Python pandas.read_pickle() Examples

The following are 30 code examples of pandas.read_pickle(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_pickle.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2) 
Example #2
Source File: abstract.py    From qb with MIT License 6 votes vote down vote up
def load_guesses(directory: str, output_type='char', folds=c.GUESSER_GENERATION_FOLDS) -> pd.DataFrame:
        """
        Loads all the guesses pertaining to a guesser inferred from directory
        :param directory: where to load guesses from
        :param output_type: One of: char, full, first
        :param folds: folds to load, by default all of them
        :return: guesses across all folds for given directory
        """
        assert len(folds) > 0
        guess_df = None
        for fold in folds:
            input_path = AbstractGuesser.guess_path(directory, fold, output_type)
            if guess_df is None:
                guess_df = pd.read_pickle(input_path)
            else:
                new_guesses_df = pd.read_pickle(input_path)
                guess_df = pd.concat([guess_df, new_guesses_df])

        return guess_df 
Example #3
Source File: test_pickle.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_write_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ext
        path2 = base + ".raw"
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file by inferred compression method
            df.to_pickle(p1)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2) 
Example #4
Source File: main.py    From Deep-Learning-with-TensorFlow-Second-Edition with MIT License 6 votes vote down vote up
def user_user_pearson_corr(ratings_df,TRAINED):
    if TRAINED:
        if os.path.isfile("model/user_user_corr_train.pkl"):
            df_corr=pd.read_pickle("user_user_corr_train.pkl")
        else:
            df =pd.read_pickle("user_item_table_train.pkl")
            df=df.T
            df_corr=df.corr()
            df_corr.to_pickle("user_user_corr_train.pkl")
    else:
        if os.path.isfile("model/user_user_corr.pkl"):
            df_corr=pd.read_pickle("user_user_corr.pkl")
        else:
            df = pd.read_pickle("user_item_table.pkl")
            df=df.T
            df_corr=df.corr()
            df_corr.to_pickle("user_user_corr.pkl")
    return df_corr 
Example #5
Source File: walker.py    From GraphEmbedding with MIT License 6 votes vote down vote up
def simulate_walks(self, num_walks, walk_length, stay_prob=0.3, workers=1, verbose=0):

        layers_adj = pd.read_pickle(self.temp_path+'layers_adj.pkl')
        layers_alias = pd.read_pickle(self.temp_path+'layers_alias.pkl')
        layers_accept = pd.read_pickle(self.temp_path+'layers_accept.pkl')
        gamma = pd.read_pickle(self.temp_path+'gamma.pkl')
        walks = []
        initialLayer = 0

        nodes = self.idx  # list(self.g.nodes())

        results = Parallel(n_jobs=workers, verbose=verbose, )(
            delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, gamma) for num in
            partition_num(num_walks, workers))

        walks = list(itertools.chain(*results))
        return walks 
Example #6
Source File: main.py    From Deep-Learning-with-TensorFlow-Second-Edition with MIT License 6 votes vote down vote up
def top_k_similar_items(movies,ratings_df,k,TRAINED=False):
    """
    Returns k similar movies for respective movie
    INPUTS :
        movies : list of numbers or number, list of movie ids
        ratings_df : rating dataframe, store all users rating for respective movies
        k          : natural number
        TRAINED    : TRUE or FALSE, weather use trained user vs movie table or untrained
    OUTPUT:
        list of k similar movies for respected movie
    """
    if TRAINED:
        df=pd.read_pickle("user_item_table_train.pkl")
    else:
        df=pd.read_pickle("user_item_table.pkl")

    corr_matrix=item_item_correlation(df,TRAINED)
    if type(movies) is not list:
        return corr_matrix[movies].sort_values(ascending=False).drop(movies).index.values[0:k]
    else:
        dict={}
        for movie in movies:
            dict.update({movie:corr_matrix[movie].sort_values(ascending=False).drop(movie).index.values[0:k]})
        pd.DataFrame(dict).to_csv("movie_top_k.csv")
        return dict 
Example #7
Source File: test_pickle.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_read_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2) 
Example #8
Source File: pipeline.py    From xbbg with Apache License 2.0 6 votes vote down vote up
def daily_stats(data: (pd.Series, pd.DataFrame), **kwargs) -> pd.DataFrame:
    """
    Daily stats for given data

    Examples:
        >>> pd.set_option('precision', 2)
        >>> (
        ...     pd.concat([
        ...         pd.read_pickle('xbbg/tests/data/sample_rms_ib0.pkl'),
        ...         pd.read_pickle('xbbg/tests/data/sample_rms_ib1.pkl'),
        ...     ], sort=False)
        ...     .pipe(get_series, col='close')
        ...     .pipe(daily_stats)
        ... )['RMS FP Equity'].iloc[:, :5]
                                   count    mean   std    min    10%
        2020-01-16 00:00:00+00:00  434.0  711.16  1.11  708.6  709.6
        2020-01-17 00:00:00+00:00  437.0  721.53  1.66  717.0  719.0
    """
    if data.empty: return pd.DataFrame()
    if 'percentiles' not in kwargs: kwargs['percentiles'] = [.1, .25, .5, .75, .9]
    return data.groupby(data.index.floor('d')).describe(**kwargs) 
Example #9
Source File: test_pickle.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_write_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ext
        path2 = base + ".raw"
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file by inferred compression method
            df.to_pickle(p1)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2) 
Example #10
Source File: testing.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path) 
Example #11
Source File: test_multi.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_legacy_pickle(self, datapath):
        if PY3:
            pytest.skip("testing for legacy pickles not "
                        "support on py3")

        path = datapath('indexes', 'data', 'multiindex_v1.pickle')
        obj = pd.read_pickle(path)

        obj2 = MultiIndex.from_tuples(obj.values)
        assert obj.equals(obj2)

        res = obj.get_indexer(obj)
        exp = np.arange(len(obj), dtype=np.intp)
        assert_almost_equal(res, exp)

        res = obj.get_indexer(obj2[::-1])
        exp = obj.get_indexer(obj[::-1])
        exp2 = obj2.get_indexer(obj2[::-1])
        assert_almost_equal(res, exp)
        assert_almost_equal(exp, exp2) 
Example #12
Source File: test_multi.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_legacy_v2_unpickle(self, datapath):

        # 0.7.3 -> 0.8.0 format manage
        path = datapath('indexes', 'data', 'mindex_073.pickle')
        obj = pd.read_pickle(path)

        obj2 = MultiIndex.from_tuples(obj.values)
        assert obj.equals(obj2)

        res = obj.get_indexer(obj)
        exp = np.arange(len(obj), dtype=np.intp)
        assert_almost_equal(res, exp)

        res = obj.get_indexer(obj2[::-1])
        exp = obj.get_indexer(obj[::-1])
        exp2 = obj2.get_indexer(obj2[::-1])
        assert_almost_equal(res, exp)
        assert_almost_equal(exp, exp2) 
Example #13
Source File: testing.py    From recruit with Apache License 2.0 6 votes vote down vote up
def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path) 
Example #14
Source File: EDA.py    From G-Bert with MIT License 6 votes vote down vote up
def split_dataset(data_path='data-multi-visit.pkl'):
    data = pd.read_pickle(data_path)
    sample_id = data['SUBJECT_ID'].unique()

    random_number = [i for i in range(len(sample_id))]
#     shuffle(random_number)

    train_id = sample_id[random_number[:int(len(sample_id)*2/3)]]
    eval_id = sample_id[random_number[int(
        len(sample_id)*2/3): int(len(sample_id)*5/6)]]
    test_id = sample_id[random_number[int(len(sample_id)*5/6):]]

    def ls2file(list_data, file_name):
        with open(file_name, 'w') as fout:
            for item in list_data:
                fout.write(str(item) + '\n')

    ls2file(train_id, 'train-id.txt')
    ls2file(eval_id, 'eval-id.txt')
    ls2file(test_id, 'test-id.txt')

    print('train size: %d, eval size: %d, test size: %d' %
          (len(train_id), len(eval_id), len(test_id))) 
Example #15
Source File: utils.py    From bioconda-utils with MIT License 5 votes vote down vote up
def _load_channel_dataframe_cached(self):
        if self.cache_file is not None and os.path.exists(self.cache_file):
            ts = datetime.datetime.fromtimestamp(os.path.getmtime(self.cache_file))
            seconds = (datetime.datetime.now() - ts).seconds
            if seconds <= self.cache_timeout:
                logger.info("Loading repodata from cache %s", self.cache_file)
                return pd.read_pickle(self.cache_file)
            else:
                logger.info("Repodata cache file too old. Reloading")

        res = self._load_channel_dataframe()

        if self.cache_file is not None:
            res.to_pickle(self.cache_file)
        return res 
Example #16
Source File: test_timeseries_legacy.py    From Computable with MIT License 5 votes vote down vote up
def test_unpickle_legacy_len0_daterange(self):
        pth, _ = os.path.split(os.path.abspath(__file__))
        filepath = os.path.join(pth, 'data', 'series_daterange0.pickle')

        result = pd.read_pickle(filepath)

        ex_index = DatetimeIndex([], freq='B')

        self.assert_(result.index.equals(ex_index))
        tm.assert_isinstance(result.index.freq, offsets.BDay)
        self.assert_(len(result) == 0) 
Example #17
Source File: keras-theano.py    From DeepLearning-IDS with MIT License 5 votes vote down vote up
def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df 
Example #18
Source File: keras-tensorflow.py    From DeepLearning-IDS with MIT License 5 votes vote down vote up
def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df 
Example #19
Source File: keras-cntk.py    From DeepLearning-IDS with MIT License 5 votes vote down vote up
def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df


# k-fold cross validation:
# https://machinelearningmastery.com/evaluate-performance-deep-learning-models-keras/ 
Example #20
Source File: fastai-expriments.py    From DeepLearning-IDS with MIT License 5 votes vote down vote up
def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df 
Example #21
Source File: test_pickle.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_pickle_path_localpath():
    df = tm.makeDataFrame()
    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result)


# ---------------------
# test pickle compression
# --------------------- 
Example #22
Source File: test_pickle.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_pickle_v0_14_1(datapath):

    cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                         categories=['a', 'b', 'c', 'd'])
    pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle')
    # This code was executed once on v0.14.1 to generate the pickle:
    #
    # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
    #                   name='foobar')
    # with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
    #
    tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) 
Example #23
Source File: test_api.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _pickle_roundtrip(self, obj):

        with ensure_clean() as path:
            obj.to_pickle(path)
            unpickled = pd.read_pickle(path)
            return unpickled 
Example #24
Source File: test_io.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _pickle_roundtrip_name(self, obj):

        with ensure_clean() as path:
            obj.to_pickle(path)
            unpickled = pd.read_pickle(path)
            return unpickled 
Example #25
Source File: response_matrix.py    From ocelot with GNU General Public License v3.0 5 votes vote down vote up
def load(self, filename):
        self.df = pd.read_pickle(filename)
        self.df2data()
        return 1 
Example #26
Source File: VideoFeatures.py    From videofeatures with MIT License 5 votes vote down vote up
def loadFeatures(self, feature_df_path=None):
    """
    loads features from pd dataframe and returns them as a matrix
    :param feature_df_path: path to pandas dataframe that holds features
    :return: (features, labels) - features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) and labels (list) of videos
    """

    if feature_df_path is None:
      feature_df_path = self.getDumpFileName('features')

    assert os.path.isfile(feature_df_path)
    feature_df = pd.read_pickle(feature_df_path)

    assert 'features' in feature_df and 'labels' in feature_df

    # stack video features to a 2d matrix
    features = np.concatenate(feature_df['features'], axis=0)

    labels = list(feature_df['labels'])

    if features.ndim == 3: # assume only one feature vector is given -> insert dimension
      features = features.reshape((features.shape[0], features.shape[1], 1, features.shape[2]))

    self.logger.info(
      'Loaded {} features from {}.  Features have shape {}'.format(self.extractor.__class__.__name__, feature_df_path,
                                                                   np.shape(features)))

    assert features.ndim == 4 and len(labels) == features.shape[0]
    return features, labels 
Example #27
Source File: scientific-hypothesis.py    From escape-from-automanual-testing with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_dataframe_round_trip(df):
    with BytesIO() as f:
        df.to_pickle(f, compression=None)
        contents = f.getvalue()
    with BytesIO(contents) as f:
        new = pd.read_pickle(f, compression=None)
    # Pandas ships testing helper functions too!
    pd.testing.assert_frame_equal(df, new) 
Example #28
Source File: trainer.py    From pykg2vec with MIT License 5 votes vote down vote up
def export_embeddings(self):
        """
            Export embeddings in tsv and pandas pickled format.
            With tsvs (both label, vector files), you can:
            1) Use those pretained embeddings for your applications.
            2) Visualize the embeddings in this website to gain insights. (https://projector.tensorflow.org/)

            Pandas dataframes can be read with pd.read_pickle('desired_file.pickle')
        """
        save_path = self.config.path_embeddings / self.model.model_name
        save_path.mkdir(parents=True, exist_ok=True)
        
        idx2ent = self.config.knowledge_graph.read_cache_data('idx2entity')
        idx2rel = self.config.knowledge_graph.read_cache_data('idx2relation')

        with open(str(save_path / "ent_labels.tsv"), 'w') as l_export_file:
            for label in idx2ent.values():
                l_export_file.write(label + "\n")

        with open(str(save_path / "rel_labels.tsv"), 'w') as l_export_file:
            for label in idx2rel.values():
                l_export_file.write(label + "\n")

        for named_embedding in self.model.parameter_list:
            all_ids = list(range(0, int(named_embedding.weight.shape[0])))

            stored_name = named_embedding.name

            if len(named_embedding.shape) == 2:
                all_embs = named_embedding.weight.detach().cpu().numpy()
                with open(str(save_path / ("%s.tsv" % stored_name)), 'w') as v_export_file:
                    for idx in all_ids:
                        v_export_file.write("\t".join([str(x) for x in all_embs[idx]]) + "\n") 
Example #29
Source File: struc2vec.py    From GraphEmbedding with MIT License 5 votes vote down vote up
def train(self, embed_size=128, window_size=5, workers=3, iter=5):

        # pd.read_pickle(self.temp_path+'walks.pkl')
        sentences = self.sentences

        print("Learning representation...")
        model = Word2Vec(sentences, size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers,
                         iter=iter)
        print("Learning representation done!")
        self.w2v_model = model

        return model 
Example #30
Source File: struc2vec.py    From GraphEmbedding with MIT License 5 votes vote down vote up
def prepare_biased_walk(self,):

        sum_weights = {}
        sum_edges = {}
        average_weight = {}
        gamma = {}
        layer = 0
        while (os.path.exists(self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl')):
            probs = pd.read_pickle(
                self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl')
            for v, list_weights in probs.items():
                sum_weights.setdefault(layer, 0)
                sum_edges.setdefault(layer, 0)
                sum_weights[layer] += sum(list_weights)
                sum_edges[layer] += len(list_weights)

            average_weight[layer] = sum_weights[layer] / sum_edges[layer]

            gamma.setdefault(layer, {})

            for v, list_weights in probs.items():
                num_neighbours = 0
                for w in list_weights:
                    if (w > average_weight[layer]):
                        num_neighbours += 1
                gamma[layer][v] = num_neighbours

            layer += 1

        pd.to_pickle(average_weight, self.temp_path + 'average_weight')
        pd.to_pickle(gamma, self.temp_path + 'gamma.pkl')