Python pandas.SparseDataFrame() Examples

The following are 30 code examples of pandas.SparseDataFrame(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function

Example #1

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_comparison_op_scalar(self):
        # GH 13001
        df = pd.DataFrame({'A': [nan, nan, 0, 1, ],
                           'B': [0, 1, 2, nan],
                           'C': [1., 2., 3., 4.],
                           'D': [nan, nan, nan, nan]})
        sparse = df.to_sparse()

        # comparison changes internal repr, compare with dense
        res = sparse > 1
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), df > 1)

        res = sparse != 0
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), df != 0)

Example #2

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_notna(self):
        # GH 8276
        df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan],
                                 'B': [0, np.nan, np.nan, 2, np.nan]})

        res = df.notna()
        exp = pd.SparseDataFrame({'A': [False, False, True, True, False],
                                  'B': [True, False, False, True, False]},
                                 default_fill_value=False)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan],
                                 'B': [0, np.nan, 0, 2, np.nan]},
                                default_fill_value=0.)
        res = df.notna()
        assert isinstance(res, pd.SparseDataFrame)
        exp = pd.DataFrame({'A': [True, True, True, True, False],
                            'B': [True, False, True, True, False]})
        tm.assert_frame_equal(res.to_dense(), exp)

Example #3

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_isna(self):
        # GH 8276
        df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan],
                                 'B': [0, np.nan, np.nan, 2, np.nan]})

        res = df.isna()
        exp = pd.SparseDataFrame({'A': [True, True, False, False, True],
                                  'B': [False, True, True, False, True]},
                                 default_fill_value=True)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan],
                                 'B': [0, np.nan, 0, 2, np.nan]},
                                default_fill_value=0.)
        res = df.isna()
        assert isinstance(res, pd.SparseDataFrame)
        exp = pd.DataFrame({'A': [False, False, False, False, True],
                            'B': [False, True, False, False, True]})
        tm.assert_frame_equal(res.to_dense(), exp)

Example #4

Source File: test_series.py From vnpy_crypto with MIT License

6 votes

def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)

Example #5

Source File: test_combine_concat.py From vnpy_crypto with MIT License

6 votes

def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx):
        frames = [self.dense1, self.dense2]
        sparse_frame = [frames[dense_idx],
                        frames[sparse_idx].to_sparse(fill_value=fill_value)]
        dense_frame = [frames[dense_idx], frames[sparse_idx]]

        # This will try both directions sparse + dense and dense + sparse
        for _ in range(2):
            res = pd.concat(sparse_frame)
            exp = pd.concat(dense_frame)

            assert isinstance(res, pd.SparseDataFrame)
            tm.assert_frame_equal(res.to_dense(), exp)

            sparse_frame = sparse_frame[::-1]
            dense_frame = dense_frame[::-1]

Example #6

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_constructor_ndarray(self, float_frame):
        # no index or columns
        sp = SparseDataFrame(float_frame.values)

        # 1d
        sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index,
                             columns=['A'])
        tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A']))

        # raise on level argument
        pytest.raises(TypeError, float_frame.reindex, columns=['A'],
                      level=1)

        # wrong length index / columns
        with pytest.raises(ValueError, match="^Index length"):
            SparseDataFrame(float_frame.values, index=float_frame.index[:-1])

        with pytest.raises(ValueError, match="^Column length"):
            SparseDataFrame(float_frame.values,
                            columns=float_frame.columns[:-1])

    # GH 9272

Example #7

Source File: test_combine_concat.py From recruit with Apache License 2.0

6 votes

def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx):
        frames = [self.dense1, self.dense2]
        sparse_frame = [frames[dense_idx],
                        frames[sparse_idx].to_sparse(fill_value=fill_value)]
        dense_frame = [frames[dense_idx], frames[sparse_idx]]

        # This will try both directions sparse + dense and dense + sparse
        for _ in range(2):
            res = pd.concat(sparse_frame)
            exp = pd.concat(dense_frame)

            assert isinstance(res, pd.SparseDataFrame)
            tm.assert_frame_equal(res.to_dense(), exp)

            sparse_frame = sparse_frame[::-1]
            dense_frame = dense_frame[::-1]

Example #8

Source File: test_series.py From recruit with Apache License 2.0

6 votes

def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp)

Example #9

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_constructor_from_series(self):

        # GH 2873
        x = Series(np.random.randn(10000), name='a')
        x = x.to_sparse(fill_value=0)
        assert isinstance(x, SparseSeries)
        df = SparseDataFrame(x)
        assert isinstance(df, SparseDataFrame)

        x = Series(np.random.randn(10000), name='a')
        y = Series(np.random.randn(10000), name='b')
        x2 = x.astype(float)
        x2.loc[:9998] = np.NaN
        # TODO: x_sparse is unused...fix
        x_sparse = x2.to_sparse(fill_value=np.NaN)  # noqa

        # Currently fails too with weird ufunc error
        # df1 = SparseDataFrame([x_sparse, y])

        y.loc[:9998] = 0
        # TODO: y_sparse is unsused...fix
        y_sparse = y.to_sparse(fill_value=0)  # noqa
        # without sparse value raises error
        # df2 = SparseDataFrame([x2_sparse, y])

Example #10

Source File: test_to_from_scipy.py From recruit with Apache License 2.0

6 votes

def test_from_scipy_correct_ordering(spmatrix):
    # GH 16179
    arr = np.arange(1, 5).reshape(2, 2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm)
    expected = SparseDataFrame(arr)
    tm.assert_sp_frame_equal(sdf, expected)
    tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())

Example #11

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_astype_bool(self):
        sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4],
                                                      fill_value=0,
                                                      dtype=np.int64),
                                     'B': SparseArray([0, 5, 0, 7],
                                                      fill_value=0,
                                                      dtype=np.int64)},
                                    default_fill_value=0)
        assert sparse['A'].dtype == SparseDtype(np.int64)
        assert sparse['B'].dtype == SparseDtype(np.int64)

        res = sparse.astype(SparseDtype(bool, False))
        exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True],
                                                   dtype=np.bool,
                                                   fill_value=False,
                                                   kind='integer'),
                                  'B': SparseArray([False, True, False, True],
                                                   dtype=np.bool,
                                                   fill_value=False,
                                                   kind='integer')},
                                 default_fill_value=False)
        tm.assert_sp_frame_equal(res, exp)
        assert res['A'].dtype == SparseDtype(np.bool)
        assert res['B'].dtype == SparseDtype(np.bool)

Example #12

Source File: test_frame.py From recruit with Apache License 2.0

6 votes

def test_dense_to_sparse(self):
        df = DataFrame({'A': [nan, nan, nan, 1, 2],
                        'B': [1, 2, nan, nan, nan]})
        sdf = df.to_sparse()
        assert isinstance(sdf, SparseDataFrame)
        assert np.isnan(sdf.default_fill_value)
        assert isinstance(sdf['A'].sp_index, BlockIndex)
        tm.assert_frame_equal(sdf.to_dense(), df)

        sdf = df.to_sparse(kind='integer')
        assert isinstance(sdf['A'].sp_index, IntIndex)

        df = DataFrame({'A': [0, 0, 0, 1, 2],
                        'B': [1, 2, 0, 0, 0]}, dtype=float)
        sdf = df.to_sparse(fill_value=0)
        assert sdf.default_fill_value == 0
        tm.assert_frame_equal(sdf.to_dense(), df)

Example #13

Source File: InferenceLightGBM.py From KDDCup2019_admin with MIT License

6 votes

def get_node_id_feature_sparse(self,X):


        pool = ThreadPool(40)
        #results = map(self.get_feaure, np.array(X.values))
        results = pool.map(self.get_feaure, np.array(X.values))

        results = list(results)
        #print(results)
        #results = np.array(results)
        #print(results)
        results = pd.DataFrame(results)

        print(results.columns)
        print("-------------")
        results = pd.SparseDataFrame(pd.get_dummies(results)).astype("float")



        print(results)

        # columns = results.columns
        # results = scipy.sparse.csr_matrix(results)
        print(results.columns)
        return results

Example #14

Source File: conftest.py From recruit with Apache License 2.0

5 votes

def float_frame():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D']; some entries are missing
    """
    # default_kind='block' is the default
    return SparseDataFrame(data, index=dates, default_kind='block')

Example #15

Source File: test_series.py From vnpy_crypto with MIT License

5 votes

def test_concat_axis1(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x')
        sparse2 = pd.SparseSeries(val2, name='y')

        res = pd.concat([sparse1, sparse2], axis=1)
        exp = pd.concat([pd.Series(val1, name='x'),
                         pd.Series(val2, name='y')], axis=1)
        exp = pd.SparseDataFrame(exp)
        tm.assert_sp_frame_equal(res, exp)

Example #16

Source File: conftest.py From recruit with Apache License 2.0

5 votes

def float_frame_int_kind():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'.
    Some entries are missing.
    """
    return SparseDataFrame(data, index=dates, default_kind='integer')

Example #17

Source File: conftest.py From recruit with Apache License 2.0

5 votes

def float_string_frame():
    """
    Fixture for sparse DataFrame of floats and strings with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing
    """
    sdf = SparseDataFrame(data, index=dates)
    sdf['foo'] = SparseArray(['bar'] * len(dates))
    return sdf

Example #18

Source File: test_reshape.py From vnpy_crypto with MIT License

5 votes

def sparse_df():
    return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}})  # eye

Example #19

Source File: test_combine_concat.py From vnpy_crypto with MIT License

5 votes

def test_concat_axis1(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x')
        sparse2 = pd.SparseSeries(val2, name='y')

        res = pd.concat([sparse1, sparse2], axis=1)
        exp = pd.concat([pd.Series(val1, name='x'),
                         pd.Series(val2, name='y')], axis=1)
        exp = pd.SparseDataFrame(exp)
        tm.assert_sp_frame_equal(res, exp)

Example #20

Source File: test_format.py From vnpy_crypto with MIT License

5 votes

def test_sparse_repr_after_set(self):
        # GH 15488
        sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
        res = sdf.copy()

        # Ignore the warning
        with pd.option_context('mode.chained_assignment', None):
            sdf[0][1] = 2  # This line triggers the bug

        repr(sdf)
        tm.assert_sp_frame_equal(sdf, res)

Example #21

Source File: conftest.py From recruit with Apache License 2.0

5 votes

def empty_frame():
    """
    Fixture for empty SparseDataFrame
    """
    return SparseDataFrame()

Example #22

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def test_apply_keep_sparse_dtype():
    # GH 23744
    sdf = SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
                          columns=['b', 'a', 'c'], default_fill_value=1)
    df = DataFrame(sdf)

    expected = sdf.apply(np.exp)
    result = df.apply(np.exp)
    tm.assert_frame_equal(expected, result)

Example #23

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def test_applymap(frame):
    # just test that it works
    result = frame.applymap(lambda x: x * 2)
    assert isinstance(result, SparseDataFrame)

Example #24

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def fill_frame(frame):
    values = frame.values.copy()
    values[np.isnan(values)] = 2

    return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
                           default_fill_value=2,
                           index=frame.index)

Example #25

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def frame(dates):
    data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
            'C': np.arange(10, dtype=np.float64),
            'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]}

    return SparseDataFrame(data, index=dates)

Example #26

Source File: test_apply.py From recruit with Apache License 2.0

5 votes

def empty():
    return SparseDataFrame()

Example #27

Source File: test_frame.py From recruit with Apache License 2.0

5 votes

def test_dropna(self, inplace, how):
        # Tests regression #21172.
        expected = pd.SparseDataFrame({"F2": [0, 1]})
        input_df = pd.SparseDataFrame(
            {"F1": [float('nan'), float('nan')], "F2": [0, 1]}
        )
        result_df = input_df.dropna(axis=1, inplace=inplace, how=how)
        if inplace:
            result_df = input_df
        tm.assert_sp_frame_equal(expected, result_df)

Example #28

Source File: test_frame.py From recruit with Apache License 2.0

5 votes

def test_quantile_multi(self):
        # GH 17386
        data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
        q = [0.1, 0.5]

        sparse_df = SparseDataFrame(data)
        result = sparse_df.quantile(q)

        dense_df = DataFrame(data)
        dense_expected = dense_df.quantile(q)
        sparse_expected = SparseDataFrame(dense_expected)

        tm.assert_frame_equal(result, dense_expected)
        tm.assert_sp_frame_equal(result, sparse_expected)

Example #29

Source File: test_frame.py From recruit with Apache License 2.0

5 votes

def test_numpy_cumsum(self, float_frame):
        result = np.cumsum(float_frame)
        expected = SparseDataFrame(float_frame.to_dense().cumsum())
        tm.assert_sp_frame_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(float_frame, dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(float_frame, out=result)

Example #30

Source File: test_frame.py From recruit with Apache License 2.0

5 votes

def test_cumsum(self, float_frame):
        expected = SparseDataFrame(float_frame.to_dense().cumsum())

        result = float_frame.cumsum()
        tm.assert_sp_frame_equal(result, expected)

        result = float_frame.cumsum(axis=None)
        tm.assert_sp_frame_equal(result, expected)

        result = float_frame.cumsum(axis=0)
        tm.assert_sp_frame_equal(result, expected)