Python pandas.SparseDataFrame() Examples

The following are 30 code examples of pandas.SparseDataFrame(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_frame.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_comparison_op_scalar(self):
        # GH 13001
        df = pd.DataFrame({'A': [nan, nan, 0, 1, ],
                           'B': [0, 1, 2, nan],
                           'C': [1., 2., 3., 4.],
                           'D': [nan, nan, nan, nan]})
        sparse = df.to_sparse()

        # comparison changes internal repr, compare with dense
        res = sparse > 1
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), df > 1)

        res = sparse != 0
        assert isinstance(res, pd.SparseDataFrame)
        tm.assert_frame_equal(res.to_dense(), df != 0) 
Example #2
Source File: test_frame.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_notna(self):
        # GH 8276
        df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan],
                                 'B': [0, np.nan, np.nan, 2, np.nan]})

        res = df.notna()
        exp = pd.SparseDataFrame({'A': [False, False, True, True, False],
                                  'B': [True, False, False, True, False]},
                                 default_fill_value=False)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan],
                                 'B': [0, np.nan, 0, 2, np.nan]},
                                default_fill_value=0.)
        res = df.notna()
        assert isinstance(res, pd.SparseDataFrame)
        exp = pd.DataFrame({'A': [True, True, True, True, False],
                            'B': [True, False, True, True, False]})
        tm.assert_frame_equal(res.to_dense(), exp) 
Example #3
Source File: test_frame.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_isna(self):
        # GH 8276
        df = pd.SparseDataFrame({'A': [np.nan, np.nan, 1, 2, np.nan],
                                 'B': [0, np.nan, np.nan, 2, np.nan]})

        res = df.isna()
        exp = pd.SparseDataFrame({'A': [True, True, False, False, True],
                                  'B': [False, True, True, False, True]},
                                 default_fill_value=True)
        exp._default_fill_value = np.nan
        tm.assert_sp_frame_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        df = pd.SparseDataFrame({'A': [0, 0, 1, 2, np.nan],
                                 'B': [0, np.nan, 0, 2, np.nan]},
                                default_fill_value=0.)
        res = df.isna()
        assert isinstance(res, pd.SparseDataFrame)
        exp = pd.DataFrame({'A': [False, False, False, False, True],
                            'B': [False, True, False, False, True]})
        tm.assert_frame_equal(res.to_dense(), exp) 
Example #4
Source File: test_series.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp) 
Example #5
Source File: test_combine_concat.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx):
        frames = [self.dense1, self.dense2]
        sparse_frame = [frames[dense_idx],
                        frames[sparse_idx].to_sparse(fill_value=fill_value)]
        dense_frame = [frames[dense_idx], frames[sparse_idx]]

        # This will try both directions sparse + dense and dense + sparse
        for _ in range(2):
            res = pd.concat(sparse_frame)
            exp = pd.concat(dense_frame)

            assert isinstance(res, pd.SparseDataFrame)
            tm.assert_frame_equal(res.to_dense(), exp)

            sparse_frame = sparse_frame[::-1]
            dense_frame = dense_frame[::-1] 
Example #6
Source File: test_frame.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_constructor_ndarray(self, float_frame):
        # no index or columns
        sp = SparseDataFrame(float_frame.values)

        # 1d
        sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index,
                             columns=['A'])
        tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A']))

        # raise on level argument
        pytest.raises(TypeError, float_frame.reindex, columns=['A'],
                      level=1)

        # wrong length index / columns
        with pytest.raises(ValueError, match="^Index length"):
            SparseDataFrame(float_frame.values, index=float_frame.index[:-1])

        with pytest.raises(ValueError, match="^Column length"):
            SparseDataFrame(float_frame.values,
                            columns=float_frame.columns[:-1])

    # GH 9272 
Example #7
Source File: test_combine_concat.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_concat_sparse_dense_rows(self, fill_value, sparse_idx, dense_idx):
        frames = [self.dense1, self.dense2]
        sparse_frame = [frames[dense_idx],
                        frames[sparse_idx].to_sparse(fill_value=fill_value)]
        dense_frame = [frames[dense_idx], frames[sparse_idx]]

        # This will try both directions sparse + dense and dense + sparse
        for _ in range(2):
            res = pd.concat(sparse_frame)
            exp = pd.concat(dense_frame)

            assert isinstance(res, pd.SparseDataFrame)
            tm.assert_frame_equal(res.to_dense(), exp)

            sparse_frame = sparse_frame[::-1]
            dense_frame = dense_frame[::-1] 
Example #8
Source File: test_series.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_frame(self):
        # GH 9850
        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x')
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(), exp)

        exp = pd.SparseDataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_sp_frame_equal(s.to_frame(name='y'), exp)

        s = pd.SparseSeries([1, 2, 0, nan, 4, nan, 0], name='x', fill_value=0)
        exp = pd.SparseDataFrame({'x': [1, 2, 0, nan, 4, nan, 0]},
                                 default_fill_value=0)

        tm.assert_sp_frame_equal(s.to_frame(), exp)
        exp = pd.DataFrame({'y': [1, 2, 0, nan, 4, nan, 0]})
        tm.assert_frame_equal(s.to_frame(name='y').to_dense(), exp) 
Example #9
Source File: test_frame.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_constructor_from_series(self):

        # GH 2873
        x = Series(np.random.randn(10000), name='a')
        x = x.to_sparse(fill_value=0)
        assert isinstance(x, SparseSeries)
        df = SparseDataFrame(x)
        assert isinstance(df, SparseDataFrame)

        x = Series(np.random.randn(10000), name='a')
        y = Series(np.random.randn(10000), name='b')
        x2 = x.astype(float)
        x2.loc[:9998] = np.NaN
        # TODO: x_sparse is unused...fix
        x_sparse = x2.to_sparse(fill_value=np.NaN)  # noqa

        # Currently fails too with weird ufunc error
        # df1 = SparseDataFrame([x_sparse, y])

        y.loc[:9998] = 0
        # TODO: y_sparse is unsused...fix
        y_sparse = y.to_sparse(fill_value=0)  # noqa
        # without sparse value raises error
        # df2 = SparseDataFrame([x2_sparse, y]) 
Example #10
Source File: test_to_from_scipy.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_from_scipy_correct_ordering(spmatrix):
    # GH 16179
    arr = np.arange(1, 5).reshape(2, 2)
    try:
        spm = spmatrix(arr)
        assert spm.dtype == arr.dtype
    except (TypeError, AssertionError):
        # If conversion to sparse fails for this spmatrix type and arr.dtype,
        # then the combination is not currently supported in NumPy, so we
        # can just skip testing it thoroughly
        return

    sdf = SparseDataFrame(spm)
    expected = SparseDataFrame(arr)
    tm.assert_sp_frame_equal(sdf, expected)
    tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) 
Example #11
Source File: test_frame.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_astype_bool(self):
        sparse = pd.SparseDataFrame({'A': SparseArray([0, 2, 0, 4],
                                                      fill_value=0,
                                                      dtype=np.int64),
                                     'B': SparseArray([0, 5, 0, 7],
                                                      fill_value=0,
                                                      dtype=np.int64)},
                                    default_fill_value=0)
        assert sparse['A'].dtype == SparseDtype(np.int64)
        assert sparse['B'].dtype == SparseDtype(np.int64)

        res = sparse.astype(SparseDtype(bool, False))
        exp = pd.SparseDataFrame({'A': SparseArray([False, True, False, True],
                                                   dtype=np.bool,
                                                   fill_value=False,
                                                   kind='integer'),
                                  'B': SparseArray([False, True, False, True],
                                                   dtype=np.bool,
                                                   fill_value=False,
                                                   kind='integer')},
                                 default_fill_value=False)
        tm.assert_sp_frame_equal(res, exp)
        assert res['A'].dtype == SparseDtype(np.bool)
        assert res['B'].dtype == SparseDtype(np.bool) 
Example #12
Source File: test_frame.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_dense_to_sparse(self):
        df = DataFrame({'A': [nan, nan, nan, 1, 2],
                        'B': [1, 2, nan, nan, nan]})
        sdf = df.to_sparse()
        assert isinstance(sdf, SparseDataFrame)
        assert np.isnan(sdf.default_fill_value)
        assert isinstance(sdf['A'].sp_index, BlockIndex)
        tm.assert_frame_equal(sdf.to_dense(), df)

        sdf = df.to_sparse(kind='integer')
        assert isinstance(sdf['A'].sp_index, IntIndex)

        df = DataFrame({'A': [0, 0, 0, 1, 2],
                        'B': [1, 2, 0, 0, 0]}, dtype=float)
        sdf = df.to_sparse(fill_value=0)
        assert sdf.default_fill_value == 0
        tm.assert_frame_equal(sdf.to_dense(), df) 
Example #13
Source File: InferenceLightGBM.py    From KDDCup2019_admin with MIT License 6 votes vote down vote up
def get_node_id_feature_sparse(self,X):


        pool = ThreadPool(40)
        #results = map(self.get_feaure, np.array(X.values))
        results = pool.map(self.get_feaure, np.array(X.values))

        results = list(results)
        #print(results)
        #results = np.array(results)
        #print(results)
        results = pd.DataFrame(results)

        print(results.columns)
        print("-------------")
        results = pd.SparseDataFrame(pd.get_dummies(results)).astype("float")



        print(results)

        # columns = results.columns
        # results = scipy.sparse.csr_matrix(results)
        print(results.columns)
        return results 
Example #14
Source File: conftest.py    From recruit with Apache License 2.0 5 votes vote down vote up
def float_frame():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D']; some entries are missing
    """
    # default_kind='block' is the default
    return SparseDataFrame(data, index=dates, default_kind='block') 
Example #15
Source File: test_series.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_concat_axis1(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x')
        sparse2 = pd.SparseSeries(val2, name='y')

        res = pd.concat([sparse1, sparse2], axis=1)
        exp = pd.concat([pd.Series(val1, name='x'),
                         pd.Series(val2, name='y')], axis=1)
        exp = pd.SparseDataFrame(exp)
        tm.assert_sp_frame_equal(res, exp) 
Example #16
Source File: conftest.py    From recruit with Apache License 2.0 5 votes vote down vote up
def float_frame_int_kind():
    """
    Fixture for sparse DataFrame of floats with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'.
    Some entries are missing.
    """
    return SparseDataFrame(data, index=dates, default_kind='integer') 
Example #17
Source File: conftest.py    From recruit with Apache License 2.0 5 votes vote down vote up
def float_string_frame():
    """
    Fixture for sparse DataFrame of floats and strings with DatetimeIndex

    Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing
    """
    sdf = SparseDataFrame(data, index=dates)
    sdf['foo'] = SparseArray(['bar'] * len(dates))
    return sdf 
Example #18
Source File: test_reshape.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def sparse_df():
    return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}})  # eye 
Example #19
Source File: test_combine_concat.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_concat_axis1(self):
        val1 = np.array([1, 2, np.nan, np.nan, 0, np.nan])
        val2 = np.array([3, np.nan, 4, 0, 0])

        sparse1 = pd.SparseSeries(val1, name='x')
        sparse2 = pd.SparseSeries(val2, name='y')

        res = pd.concat([sparse1, sparse2], axis=1)
        exp = pd.concat([pd.Series(val1, name='x'),
                         pd.Series(val2, name='y')], axis=1)
        exp = pd.SparseDataFrame(exp)
        tm.assert_sp_frame_equal(res, exp) 
Example #20
Source File: test_format.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_sparse_repr_after_set(self):
        # GH 15488
        sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
        res = sdf.copy()

        # Ignore the warning
        with pd.option_context('mode.chained_assignment', None):
            sdf[0][1] = 2  # This line triggers the bug

        repr(sdf)
        tm.assert_sp_frame_equal(sdf, res) 
Example #21
Source File: conftest.py    From recruit with Apache License 2.0 5 votes vote down vote up
def empty_frame():
    """
    Fixture for empty SparseDataFrame
    """
    return SparseDataFrame() 
Example #22
Source File: test_apply.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_apply_keep_sparse_dtype():
    # GH 23744
    sdf = SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
                          columns=['b', 'a', 'c'], default_fill_value=1)
    df = DataFrame(sdf)

    expected = sdf.apply(np.exp)
    result = df.apply(np.exp)
    tm.assert_frame_equal(expected, result) 
Example #23
Source File: test_apply.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_applymap(frame):
    # just test that it works
    result = frame.applymap(lambda x: x * 2)
    assert isinstance(result, SparseDataFrame) 
Example #24
Source File: test_apply.py    From recruit with Apache License 2.0 5 votes vote down vote up
def fill_frame(frame):
    values = frame.values.copy()
    values[np.isnan(values)] = 2

    return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
                           default_fill_value=2,
                           index=frame.index) 
Example #25
Source File: test_apply.py    From recruit with Apache License 2.0 5 votes vote down vote up
def frame(dates):
    data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
            'C': np.arange(10, dtype=np.float64),
            'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]}

    return SparseDataFrame(data, index=dates) 
Example #26
Source File: test_apply.py    From recruit with Apache License 2.0 5 votes vote down vote up
def empty():
    return SparseDataFrame() 
Example #27
Source File: test_frame.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_dropna(self, inplace, how):
        # Tests regression #21172.
        expected = pd.SparseDataFrame({"F2": [0, 1]})
        input_df = pd.SparseDataFrame(
            {"F1": [float('nan'), float('nan')], "F2": [0, 1]}
        )
        result_df = input_df.dropna(axis=1, inplace=inplace, how=how)
        if inplace:
            result_df = input_df
        tm.assert_sp_frame_equal(expected, result_df) 
Example #28
Source File: test_frame.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_quantile_multi(self):
        # GH 17386
        data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
        q = [0.1, 0.5]

        sparse_df = SparseDataFrame(data)
        result = sparse_df.quantile(q)

        dense_df = DataFrame(data)
        dense_expected = dense_df.quantile(q)
        sparse_expected = SparseDataFrame(dense_expected)

        tm.assert_frame_equal(result, dense_expected)
        tm.assert_sp_frame_equal(result, sparse_expected) 
Example #29
Source File: test_frame.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_numpy_cumsum(self, float_frame):
        result = np.cumsum(float_frame)
        expected = SparseDataFrame(float_frame.to_dense().cumsum())
        tm.assert_sp_frame_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(float_frame, dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(float_frame, out=result) 
Example #30
Source File: test_frame.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_cumsum(self, float_frame):
        expected = SparseDataFrame(float_frame.to_dense().cumsum())

        result = float_frame.cumsum()
        tm.assert_sp_frame_equal(result, expected)

        result = float_frame.cumsum(axis=None)
        tm.assert_sp_frame_equal(result, expected)

        result = float_frame.cumsum(axis=0)
        tm.assert_sp_frame_equal(result, expected)