Python six.moves.cPickle.dump() Examples

The following are 30 code examples of six.moves.cPickle.dump(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module six.moves.cPickle , or try the search function .
Example #1
Source File: exp.py    From tranX with Apache License 2.0 6 votes vote down vote up
def test(args):
    test_set = Dataset.from_bin_file(args.test_file)
    assert args.load_model

    print('load model from [%s]' % args.load_model, file=sys.stderr)
    params = torch.load(args.load_model, map_location=lambda storage, loc: storage)
    transition_system = params['transition_system']
    saved_args = params['args']
    saved_args.cuda = args.cuda
    # set the correct domain from saved arg
    args.lang = saved_args.lang

    parser_cls = Registrable.by_name(args.parser)
    parser = parser_cls.load(model_path=args.load_model, cuda=args.cuda)
    parser.eval()
    evaluator = Registrable.by_name(args.evaluator)(transition_system, args=args)
    eval_results, decode_results = evaluation.evaluate(test_set.examples, parser, evaluator, args,
                                                       verbose=args.verbose, return_decode_result=True)
    print(eval_results, file=sys.stderr)
    if args.save_decode_to:
        pickle.dump(decode_results, open(args.save_decode_to, 'wb')) 
Example #2
Source File: 1_prepare_pickle_200.py    From Neural-Network-Programming-with-TensorFlow with MIT License 6 votes vote down vote up
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
  dataset_names = []
  for folder in data_folders:
    set_filename = folder + '.pickle'
    dataset_names.append(set_filename)
    if os.path.exists(set_filename) and not force:
      # You may override by setting force=True.
      print('%s already present - Skipping pickling.' % set_filename)
    else:
      print('Pickling %s.' % set_filename)
      dataset = load_letter(folder, min_num_images_per_class)
      try:
        with open(set_filename, 'wb') as f:
          pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
      except Exception as e:
        print('Unable to save data to', set_filename, ':', e)
  
  return dataset_names 
Example #3
Source File: preprocess_easypr.py    From EasyPR-python with Apache License 2.0 6 votes vote down vote up
def generate_label(cls_dir, labels):
    total_list = []

    cnt = 0
    for label in labels:
        for name in os.listdir(os.path.join(DATA_DIR, cls_dir, label)):
            record = {'name': name, 'label': cnt, 'subdir': label}
            total_list.append(record)
        cnt += 1
    random.shuffle(total_list)
    train_size = int(0.7 * len(total_list))
    print(train_size, len(total_list))

    with open(os.path.join(DATA_DIR, cls_dir, 'train.pickle'), 'wb') as f:
        pickle.dump(total_list[:train_size], f, 2)

    with open(os.path.join(DATA_DIR, cls_dir, 'val.pickle'), 'wb') as f:
        pickle.dump(total_list[train_size:], f, 2) 
Example #4
Source File: httpcache.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def store_response(self, spider, request, response):
        """Store the given response in the cache."""
        rpath = self._get_request_path(spider, request)
        if not os.path.exists(rpath):
            os.makedirs(rpath)
        metadata = {
            'url': request.url,
            'method': request.method,
            'status': response.status,
            'response_url': response.url,
            'timestamp': time(),
        }
        with self._open(os.path.join(rpath, 'meta'), 'wb') as f:
            f.write(to_bytes(repr(metadata)))
        with self._open(os.path.join(rpath, 'pickled_meta'), 'wb') as f:
            pickle.dump(metadata, f, protocol=2)
        with self._open(os.path.join(rpath, 'response_headers'), 'wb') as f:
            f.write(headers_dict_to_raw(response.headers))
        with self._open(os.path.join(rpath, 'response_body'), 'wb') as f:
            f.write(response.body)
        with self._open(os.path.join(rpath, 'request_headers'), 'wb') as f:
            f.write(headers_dict_to_raw(request.headers))
        with self._open(os.path.join(rpath, 'request_body'), 'wb') as f:
            f.write(request.body) 
Example #5
Source File: 1_prepare_pickle.py    From Neural-Network-Programming-with-TensorFlow with MIT License 6 votes vote down vote up
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
  dataset_names = []
  for folder in data_folders:
    set_filename = folder + '.pickle'
    dataset_names.append(set_filename)
    if os.path.exists(set_filename) and not force:
      # You may override by setting force=True.
      print('%s already present - Skipping pickling.' % set_filename)
    else:
      print('Pickling %s.' % set_filename)
      dataset = load_letter(folder, min_num_images_per_class)
      try:
        with open(set_filename, 'wb') as f:
          pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
      except Exception as e:
        print('Unable to save data to', set_filename, ':', e)
  
  return dataset_names 
Example #6
Source File: embeddings.py    From polyglot with GNU General Public License v3.0 6 votes vote down vote up
def load(fname):
    """Load an embedding dump generated by `save`"""

    content = _open(fname).read()
    if PY2:
      state = pickle.loads(content)
    else:
      state = pickle.loads(content, encoding='latin1')
    voc, vec = state
    if len(voc) == 2:
      words, counts = voc
      word_count = dict(zip(words, counts))
      vocab = CountedVocabulary(word_count=word_count)
    else:
      vocab = OrderedVocabulary(voc)
    return Embedding(vocabulary=vocab, vectors=vec) 
Example #7
Source File: prepro_ngrams_flickr30k.py    From NeuralBabyTalk with MIT License 6 votes vote down vote up
def main(params):

  info = json.load(open(params['dict_json'], 'r'))
  imgs = json.load(open(params['input_json'], 'r'))

  itow = info['ix_to_word']
  wtoi = {w:i for i,w in itow.items()}
  wtod = {w:i+1 for w,i in info['wtod'].items()} # word to detection
  # dtoi = {w:i+1 for i,w in enumerate(wtod.keys())} # detection to index
  dtoi = wtod
  wtol = info['wtol']
  itod = {i:w for w,i in dtoi.items()}

  # imgs = imgs['images']

  ngram_idxs, ref_len = build_dict(imgs, info, wtoi, wtod, dtoi, wtol, itod, params)

  # cPickle.dump({'document_frequency': ngram_words, 'ref_len': ref_len}, open(params['output_pkl']+'-words.p','w'), protocol=cPickle.HIGHEST_PROTOCOL)
  cPickle.dump({'document_frequency': ngram_idxs, 'ref_len': ref_len}, open(params['output_pkl']+'-idxs.p','w'), protocol=cPickle.HIGHEST_PROTOCOL) 
Example #8
Source File: prepro_ngrams_bak.py    From NeuralBabyTalk with MIT License 6 votes vote down vote up
def main(params):

  det_train_path = 'data/coco/annotations/instances_train2014.json'
  det_val_path = 'data/coco/annotations/instances_val2014.json'

  coco_det_train = COCO(det_train_path)
  coco_det_val = COCO(det_val_path)

  info = json.load(open(params['dict_json'], 'r'))
  imgs = json.load(open(params['input_json'], 'r'))

  itow = info['ix_to_word']
  wtoi = {w:i for i,w in itow.items()}
  wtod = {w:i+1 for w,i in info['wtod'].items()} # word to detection
  dtoi = {w:i+1 for i,w in enumerate(wtod.keys())} # detection to index
  wtol = info['wtol']
  ctol = {c:i+1 for i, c in enumerate(coco_det_train.cats.keys())}

  # imgs = imgs['images']

  ngram_idxs, ref_len = build_dict(imgs, info, wtoi, wtod, dtoi, wtol, ctol, coco_det_train, coco_det_val, params)

  # cPickle.dump({'document_frequency': ngram_words, 'ref_len': ref_len}, open(params['output_pkl']+'-words.p','w'), protocol=cPickle.HIGHEST_PROTOCOL)
  cPickle.dump({'document_frequency': ngram_idxs, 'ref_len': ref_len}, open(params['output_pkl']+'-idxs.p','w'), protocol=cPickle.HIGHEST_PROTOCOL) 
Example #9
Source File: 1_prepare_pickle_200_greyscale.py    From Neural-Network-Programming-with-TensorFlow with MIT License 6 votes vote down vote up
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
  dataset_names = []
  for folder in data_folders:
    set_filename = folder + '.pickle'
    dataset_names.append(set_filename)
    if os.path.exists(set_filename) and not force:
      # You may override by setting force=True.
      print('%s already present - Skipping pickling.' % set_filename)
    else:
      print('Pickling %s.' % set_filename)
      dataset = load_letter(folder, min_num_images_per_class)
      try:
        with open(set_filename, 'wb') as f:
          pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
      except Exception as e:
        print('Unable to save data to', set_filename, ':', e)
  
  return dataset_names 
Example #10
Source File: test_pickle.py    From neural-network-animation with MIT License 6 votes vote down vote up
def recursive_pickle(top_obj):
    """
    Recursively pickle all of the given objects subordinates, starting with
    the deepest first. **Very** handy for debugging pickling issues, but
    also very slow (as it literally pickles each object in turn).

    Handles circular object references gracefully.

    """
    objs = depth_getter(top_obj)
    # sort by depth then by nest_info
    objs = sorted(six.itervalues(objs), key=lambda val: (-val[0], val[2]))

    for _, obj, location in objs:
#        print('trying %s' % location)
        try:
            pickle.dump(obj, BytesIO(), pickle.HIGHEST_PROTOCOL)
        except Exception as err:
            print(obj)
            print('Failed to pickle %s. \n Type: %s. Traceback '
                  'follows:' % (location, type(obj)))
            raise 
Example #11
Source File: data_loader.py    From AI_Poet_Totoro with MIT License 6 votes vote down vote up
def save_dataset(self, filename):
        """使用pickle保存数据文件。

        数据文件包含词典和对话样本。

        Args:
            filename (str): pickle 文件名
        """
        with open(filename, 'wb') as handle:
            data = {
                    'trainingSamples': self.trainingSamples
            }

            if len(self.validationSamples)>0:
                data['validationSamples'] = self.validationSamples
                data['testingSamples'] = self.testingSamples
                data['maxSeqLen'] = self.seq_max_length

            cPickle.dump(data, handle, -1)  # Using the highest protocol available

  # 3. utility 函数,使用pickle读文件 
Example #12
Source File: read_LaMemDataset.py    From Colorization.tensorflow with MIT License 6 votes vote down vote up
def read_dataset(data_dir):
    pickle_filename = "lamem.pickle"
    pickle_filepath = os.path.join(data_dir, pickle_filename)
    if not os.path.exists(pickle_filepath):
        utils.maybe_download_and_extract(data_dir, DATA_URL, is_tarfile=True)
        lamem_folder = (DATA_URL.split("/")[-1]).split(os.path.extsep)[0]
        result = {'images': create_image_lists(os.path.join(data_dir, lamem_folder))}
        print ("Pickling ...")
        with open(pickle_filepath, 'wb') as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
    else:
        print ("Found pickle file!")

    with open(pickle_filepath, 'rb') as f:
        result = pickle.load(f)
        training_records = result['images']
        del result

    return training_records 
Example #13
Source File: notmnist_prepare_data.py    From deep-learning-samples with The Unlicense 6 votes vote down vote up
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
  dataset_names = []
  for folder in data_folders:
    set_filename = folder + '.pickle'
    dataset_names.append(set_filename)
    if os.path.exists(set_filename) and not force:
      # You may override by setting force=True.
      print('%s already present - Skipping pickling.' % set_filename)
    else:
      print('Pickling %s.' % set_filename)
      dataset = load_letter(folder, min_num_images_per_class)
      try:
        with open(set_filename, 'wb') as f:
          pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
      except Exception as e:
        print('Unable to save data to', set_filename, ':', e)

  return dataset_names 
Example #14
Source File: cmodule.py    From D-VAE with MIT License 6 votes vote down vote up
def save_pkl(self):
        """
        Dump this object into its `key_pkl` file.

        May raise a cPickle.PicklingError if such an exception is raised at
        pickle time (in which case a warning is also displayed).

        """
        # Note that writing in binary mode is important under Windows.
        try:
            with open(self.key_pkl, 'wb') as f:
                pickle.dump(self, f, protocol=pickle.HIGHEST_PROTOCOL)
        except pickle.PicklingError:
            _logger.warning("Cache leak due to unpickle-able key data %s",
                            self.keys)
            os.remove(self.key_pkl)
            raise 
Example #15
Source File: prepare_notmnist.py    From Neural-Network-Programming-with-TensorFlow with MIT License 6 votes vote down vote up
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
  dataset_names = []
  for folder in data_folders:
    set_filename = folder + '.pickle'
    dataset_names.append(set_filename)
    if os.path.exists(set_filename) and not force:
      print('%s already present - Skipping pickling.' % set_filename)
    else:
      print('Pickling %s.' % set_filename)
      dataset = load_letter(folder, min_num_images_per_class)
      try:
        with open(set_filename, 'wb') as f:
          #pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
          print(pickle.HIGHEST_PROTOCOL)
          pickle.dump(dataset, f, 2)
      except Exception as e:
        print('Unable to save data to', set_filename, ':', e)
  
  return dataset_names 
Example #16
Source File: data.py    From shopping-classification with Apache License 2.0 6 votes vote down vote up
def build_y_vocab(self):
        pool = Pool(opt.num_workers)
        try:
            rets = pool.map_async(build_y_vocab,
                                  [(data_path, 'train')
                                   for data_path in opt.train_data_list]).get(99999999)
            pool.close()
            pool.join()
            y_vocab = set()
            for _y_vocab in rets:
                for k in six.iterkeys(_y_vocab):
                    y_vocab.add(k)
            self.y_vocab = {y: idx for idx, y in enumerate(y_vocab)}
        except KeyboardInterrupt:
            pool.terminate()
            pool.join()
            raise
        self.logger.info('size of y vocab: %s' % len(self.y_vocab))
        cPickle.dump(self.y_vocab, open(self.y_vocab_path, 'wb'), 2) 
Example #17
Source File: data.py    From Text-Generate-RNN with Apache License 2.0 6 votes vote down vote up
def preprocess(self, input_file, vocab_file, tensor_file):
        def handle(line):
            if len(line) > MAX_LENGTH:
                index_end = line.rfind('。', 0, MAX_LENGTH)
                index_end = index_end if index_end > 0 else MAX_LENGTH
                line = line[:index_end + 1]
            return BEGIN_CHAR + line + END_CHAR

        self.texts = [line.strip().replace('\n', '') for line in
                        open(input_file, encoding='utf-8')]
        self.texts = [handle(line) for line in self.texts if len(line) > MIN_LENGTH]

        words = ['*', ' ']
        for text in self.texts:
            words += [word for word in text]
        self.words = list(set(words))
        self.words_size = len(self.words)

        self.vocab = dict(zip(self.words, range(len(self.words))))
        self.vocab_id = dict(zip(range(len(self.words)), self.words))
        with open(vocab_file, 'wb') as f:
            cPickle.dump(self.words, f)
        self.texts_vector = np.array([
            list(map(self.vocab.get, poetry)) for poetry in self.texts])
        np.save(tensor_file, self.texts_vector) 
Example #18
Source File: predictions2html.py    From ctw-baseline with MIT License 6 votes vote down vote up
def create_pkl():
    with open(settings.TEST_CLASSIFICATION) as f:
        lines = f.read().splitlines()
    with open(settings.TEST_CLASSIFICATION_GT) as f:
        gt_lines = f.read().splitlines()
    assert len(lines) == len(gt_lines)
    test = []
    for i, line in enumerate(lines):
        anno = json.loads(line.strip())
        gt_anno = json.loads(gt_lines[i].strip())
        image = misc.imread(os.path.join(settings.TEST_IMAGE_DIR, anno['file_name']))
        assert image.shape == (anno['height'], anno['width'], 3)
        assert len(anno['proposals']) == len(gt_anno['ground_truth'])
        for proposal, gt in zip(anno['proposals'], gt_anno['ground_truth']):
            cropped = crop(image, proposal['adjusted_bbox'], 32)
            test.append([cropped, gt])
        if i % 100 == 0:
            print('test', i, '/', len(lines))
    with open(settings.TEST_CLS_CROPPED, 'wb') as f:
        cPickle.dump(test, f) 
Example #19
Source File: cmodule.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def save_pkl(self):
        """
        Dump this object into its `key_pkl` file.

        May raise a cPickle.PicklingError if such an exception is raised at
        pickle time (in which case a warning is also displayed).

        """
        # Note that writing in binary mode is important under Windows.
        try:
            with open(self.key_pkl, 'wb') as f:
                pickle.dump(self, f, protocol=pickle.HIGHEST_PROTOCOL)
        except pickle.PicklingError:
            _logger.warning("Cache leak due to unpickle-able key data %s",
                            self.keys)
            os.remove(self.key_pkl)
            raise 
Example #20
Source File: test_grids.py    From armi with Apache License 2.0 5 votes vote down vote up
def test_is_pickleable(self):
        grid = grids.HexGrid.fromPitch(1.0, numRings=3)
        loc = grid[1, 1, 0]
        for protocol in range(cPickle.HIGHEST_PROTOCOL + 1):
            buf = BytesIO()
            cPickle.dump(loc, buf, protocol=protocol)
            buf.seek(0)
            newLoc = cPickle.load(buf)
            assert_allclose(loc.indices, newLoc.indices) 
Example #21
Source File: batch_loader.py    From pytorch_RVAE with MIT License 5 votes vote down vote up
def preprocess(self, data_files, idx_files, tensor_files):

        data = [open(file, "r").read() for file in data_files]
        merged_data = data[0] + '\n' + data[1]

        self.chars_vocab_size, self.idx_to_char, self.char_to_idx = self.build_character_vocab(merged_data)

        with open(idx_files[1], 'wb') as f:
            cPickle.dump(self.idx_to_char, f)

        data_words = [[line.split() for line in target.split('\n')] for target in data]
        merged_data_words = merged_data.split()

        self.words_vocab_size, self.idx_to_word, self.word_to_idx = self.build_word_vocab(merged_data_words)
        self.max_word_len = np.amax([len(word) for word in self.idx_to_word])
        self.max_seq_len = np.amax([len(line) for target in data_words for line in target])
        self.num_lines = [len(target) for target in data_words]

        with open(idx_files[0], 'wb') as f:
            cPickle.dump(self.idx_to_word, f)

        self.word_tensor = np.array(
            [[list(map(self.word_to_idx.get, line)) for line in target] for target in data_words])
        print(self.word_tensor.shape)
        for i, path in enumerate(tensor_files[0]):
            np.save(path, self.word_tensor[i])

        self.character_tensor = np.array(
            [[list(map(self.encode_characters, line)) for line in target] for target in data_words])
        for i, path in enumerate(tensor_files[1]):
            np.save(path, self.character_tensor[i])

        self.just_words = [word for line in self.word_tensor[0] for word in line] 
Example #22
Source File: io.py    From pcl.pytorch with MIT License 5 votes vote down vote up
def save_object(obj, file_name):
    """Save a Python object by pickling it."""
    file_name = os.path.abspath(file_name)
    with open(file_name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 
Example #23
Source File: io.py    From Detectron.pytorch with MIT License 5 votes vote down vote up
def save_object(obj, file_name):
    """Save a Python object by pickling it."""
    file_name = os.path.abspath(file_name)
    with open(file_name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 
Example #24
Source File: test_cross_section_manager.py    From armi with Apache License 2.0 5 votes vote down vote up
def test_is_pickleable(self):
        self.bc.weightingParam = "test"
        buf = BytesIO()
        cPickle.dump(self.bc, buf)
        buf.seek(0)
        newBc = cPickle.load(buf)
        self.assertEqual(self.bc.weightingParam, newBc.weightingParam) 
Example #25
Source File: io.py    From FPN-Pytorch with MIT License 5 votes vote down vote up
def save_object(obj, file_name):
    """Save a Python object by pickling it."""
    file_name = os.path.abspath(file_name)
    with open(file_name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) 
Example #26
Source File: test_serialization.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_in_memory():
    skip_if_not_available(datasets=['mnist.hdf5'])
    # Load MNIST and get two batches
    mnist = MNIST(('train',), load_in_memory=True)
    data_stream = DataStream(mnist, iteration_scheme=SequentialScheme(
        examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(256, 512))
    mnist.close(handle)
    assert numpy.all(features == known_features)

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(512, 768))
    mnist.close(handle)
    assert numpy.all(features == known_features) 
Example #27
Source File: test_converters.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def setUp(self):
        numpy.random.seed(9 + 5 + 2015)
        self.train_features_mock = numpy.random.randint(
            0, 256, (10, 3, 32, 32)).astype('uint8')
        self.train_fine_labels_mock = numpy.random.randint(
            0, 100, (10,)).astype('uint8')
        self.train_coarse_labels_mock = numpy.random.randint(
            0, 20, (10,)).astype('uint8')
        self.test_features_mock = numpy.random.randint(
            0, 256, (10, 3, 32, 32)).astype('uint8')
        self.test_fine_labels_mock = numpy.random.randint(
            0, 100, (10,)).astype('uint8')
        self.test_coarse_labels_mock = numpy.random.randint(
            0, 20, (10,)).astype('uint8')
        self.tempdir = tempfile.mkdtemp()
        cwd = os.getcwd()
        os.chdir(self.tempdir)
        os.mkdir('cifar-100-python')
        filename = os.path.join('cifar-100-python', 'train')
        with open(filename, 'wb') as f:
            cPickle.dump({'data': self.train_features_mock.reshape((10, -1)),
                          'fine_labels': self.train_fine_labels_mock,
                          'coarse_labels': self.train_coarse_labels_mock}, f)
        filename = os.path.join('cifar-100-python', 'test')
        with open(filename, 'wb') as f:
            cPickle.dump({'data': self.test_features_mock.reshape((10, -1)),
                          'fine_labels': self.test_fine_labels_mock,
                          'coarse_labels': self.test_coarse_labels_mock}, f)
        with tarfile.open('cifar-100-python.tar.gz', 'w:gz') as tar_file:
            tar_file.add('cifar-100-python')
        os.chdir(cwd) 
Example #28
Source File: test_converters.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def setUp(self):
        numpy.random.seed(9 + 5 + 2015)
        self.train_features_mock = [
            numpy.random.randint(0, 256, (10, 3, 32, 32)).astype('uint8')
            for i in range(5)]
        self.train_targets_mock = [
            numpy.random.randint(0, 10, (10,)).astype('uint8')
            for i in range(5)]
        self.test_features_mock = numpy.random.randint(
            0, 256, (10, 3, 32, 32)).astype('uint8')
        self.test_targets_mock = numpy.random.randint(
            0, 10, (10,)).astype('uint8')
        self.tempdir = tempfile.mkdtemp()
        cwd = os.getcwd()
        os.chdir(self.tempdir)
        os.mkdir('cifar-10-batches-py')
        for i, (x, y) in enumerate(zip(self.train_features_mock,
                                       self.train_targets_mock)):
            filename = os.path.join(
                'cifar-10-batches-py', 'data_batch_{}'.format(i + 1))
            with open(filename, 'wb') as f:
                cPickle.dump({'data': x, 'labels': y}, f)
        filename = os.path.join('cifar-10-batches-py', 'test_batch')
        with open(filename, 'wb') as f:
            cPickle.dump({'data': self.test_features_mock,
                          'labels': self.test_targets_mock},
                         f)
        with tarfile.open('cifar-10-python.tar.gz', 'w:gz') as tar_file:
            tar_file.add('cifar-10-batches-py')
        os.chdir(cwd) 
Example #29
Source File: callcache.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def persist(self, filename=None):
        if filename is None:
            filename = self.filename
        with open(filename, 'w') as f:
            pickle.dump(self.cache, f) 
Example #30
Source File: data_loader.py    From quantified-self with MIT License 5 votes vote down vote up
def preprocess(self, input_file, vocab_file, tensor_file):
        with codecs.open(input_file, "r", encoding=self.encoding) as f:
            data = f.read()
        counter = collections.Counter(data)
        count_pairs = sorted(counter.items(), key=lambda x: -x[1])
        self.chars, _ = zip(*count_pairs)
        self.vocab_size = len(self.chars)
        self.vocab = dict(zip(self.chars, range(len(self.chars))))
        with open(vocab_file, "wb") as f:
            cPickle.dump(self.chars, f)
        self.tensor = np.array(list(map(self.vocab.get, data)))
        np.save(tensor_file, self.tensor)