Python pandas.SparseDtype() Examples

The following are 27 code examples of pandas.SparseDtype(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: filtering_fe_autotype.py    From dash-docs with MIT License 6 votes vote down vote up
def table_type(df_column):
    # Note - this only works with Pandas >= 1.0.0

    if sys.version_info < (3, 0):  # Pandas 1.0.0 does not support Python 2
        return 'any'

    if isinstance(df_column.dtype, pd.DatetimeTZDtype):
        return 'datetime',
    elif (isinstance(df_column.dtype, pd.StringDtype) or
            isinstance(df_column.dtype, pd.BooleanDtype) or
            isinstance(df_column.dtype, pd.CategoricalDtype) or
            isinstance(df_column.dtype, pd.PeriodDtype)):
        return 'text'
    elif (isinstance(df_column.dtype, pd.SparseDtype) or
            isinstance(df_column.dtype, pd.IntervalDtype) or
            isinstance(df_column.dtype, pd.Int8Dtype) or
            isinstance(df_column.dtype, pd.Int16Dtype) or
            isinstance(df_column.dtype, pd.Int32Dtype) or
            isinstance(df_column.dtype, pd.Int64Dtype)):
        return 'numeric'
    else:
        return 'any' 
Example #2
Source File: test_sparse.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_where_series(self, data, na_value):
        assert data[0] != data[1]
        cls = type(data)
        a, b = data[:2]

        ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))

        cond = np.array([True, True, False, False])
        result = ser.where(cond)

        new_dtype = SparseDtype('float', 0.0)
        expected = pd.Series(cls._from_sequence([a, a, na_value, na_value],
                                                dtype=new_dtype))
        self.assert_series_equal(result, expected)

        other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
        cond = np.array([True, False, True, True])
        result = ser.where(cond, other)
        expected = pd.Series(cls._from_sequence([a, b, b, b],
                                                dtype=data.dtype))
        self.assert_series_equal(result, expected) 
Example #3
Source File: test_sparse.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_fillna_frame(self, data_missing):
        # Have to override to specify that fill_value will change.
        fill_value = data_missing[1]

        result = pd.DataFrame({
            "A": data_missing,
            "B": [1, 2]
        }).fillna(fill_value)

        if pd.isna(data_missing.fill_value):
            dtype = SparseDtype(data_missing.dtype, fill_value)
        else:
            dtype = data_missing.dtype

        expected = pd.DataFrame({
            "A": data_missing._from_sequence([fill_value, fill_value],
                                             dtype=dtype),
            "B": [1, 2],
        })

        self.assert_frame_equal(result, expected) 
Example #4
Source File: test_sparse.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_isna(self, data_missing):
        expected_dtype = SparseDtype(bool,
                                     pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)

        result = pd.isna(data_missing)
        self.assert_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=expected_dtype)
        self.assert_series_equal(result, expected) 
Example #5
Source File: test_utils.py    From scprep with GNU General Public License v3.0 6 votes vote down vote up
def test_SparseDataFrame():
    X = data.load_10X(sparse=False)
    Y = X.astype(pd.SparseDtype(float, fill_value=0.0))
    index = X.index
    columns = X.columns

    def test_fun(X):
        X = scprep.utils.SparseDataFrame(X, index=index, columns=columns)
        utils.assert_matrix_class_equivalent(X, Y)

    matrix.test_all_matrix_types(X, test_fun)
    matrix.test_pandas_matrix_types(
        X,
        utils.assert_transform_equivalent,
        Y=Y,
        transform=scprep.utils.SparseDataFrame,
    ) 
Example #6
Source File: test_utils.py    From scprep with GNU General Public License v3.0 6 votes vote down vote up
def test_is_sparse_dataframe():
    X = data.load_10X(sparse=False)
    Y = X.astype(pd.SparseDtype(float, fill_value=0.0))
    assert scprep.utils.is_sparse_dataframe(Y)

    def test_fun(X):
        assert not scprep.utils.is_sparse_dataframe(X)

    types = (
        matrix._scipy_matrix_types
        + matrix._numpy_matrix_types
        + matrix._pandas_dense_matrix_types
    )
    if matrix._pandas_0:
        types.append(matrix.SparseDataFrame_deprecated)
    matrix.test_matrix_types(
        X, test_fun, types,
    ) 
Example #7
Source File: test_sparse.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_where_series(self, data, na_value):
        assert data[0] != data[1]
        cls = type(data)
        a, b = data[:2]

        ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))

        cond = np.array([True, True, False, False])
        result = ser.where(cond)

        new_dtype = SparseDtype('float', 0.0)
        expected = pd.Series(cls._from_sequence([a, a, na_value, na_value],
                                                dtype=new_dtype))
        self.assert_series_equal(result, expected)

        other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
        cond = np.array([True, False, True, True])
        result = ser.where(cond, other)
        expected = pd.Series(cls._from_sequence([a, b, b, b],
                                                dtype=data.dtype))
        self.assert_series_equal(result, expected) 
Example #8
Source File: test_sparse.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_fillna_frame(self, data_missing):
        # Have to override to specify that fill_value will change.
        fill_value = data_missing[1]

        result = pd.DataFrame({
            "A": data_missing,
            "B": [1, 2]
        }).fillna(fill_value)

        if pd.isna(data_missing.fill_value):
            dtype = SparseDtype(data_missing.dtype, fill_value)
        else:
            dtype = data_missing.dtype

        expected = pd.DataFrame({
            "A": data_missing._from_sequence([fill_value, fill_value],
                                             dtype=dtype),
            "B": [1, 2],
        })

        self.assert_frame_equal(result, expected) 
Example #9
Source File: test_sparse.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_isna(self, data_missing):
        expected_dtype = SparseDtype(bool,
                                     pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)

        result = pd.isna(data_missing)
        self.assert_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=expected_dtype)
        self.assert_series_equal(result, expected) 
Example #10
Source File: test_sparse.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_isna(self, data_missing):
        expected_dtype = SparseDtype(bool,
                                     pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)

        result = pd.isna(data_missing)
        self.assert_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=expected_dtype)
        self.assert_series_equal(result, expected) 
Example #11
Source File: test_sparse.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_fillna_frame(self, data_missing):
        # Have to override to specify that fill_value will change.
        fill_value = data_missing[1]

        result = pd.DataFrame({
            "A": data_missing,
            "B": [1, 2]
        }).fillna(fill_value)

        if pd.isna(data_missing.fill_value):
            dtype = SparseDtype(data_missing.dtype, fill_value)
        else:
            dtype = data_missing.dtype

        expected = pd.DataFrame({
            "A": data_missing._from_sequence([fill_value, fill_value],
                                             dtype=dtype),
            "B": [1, 2],
        })

        self.assert_frame_equal(result, expected) 
Example #12
Source File: test_sparse.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_where_series(self, data, na_value):
        assert data[0] != data[1]
        cls = type(data)
        a, b = data[:2]

        ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))

        cond = np.array([True, True, False, False])
        result = ser.where(cond)

        new_dtype = SparseDtype('float', 0.0)
        expected = pd.Series(cls._from_sequence([a, a, na_value, na_value],
                                                dtype=new_dtype))
        self.assert_series_equal(result, expected)

        other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
        cond = np.array([True, False, True, True])
        result = ser.where(cond, other)
        expected = pd.Series(cls._from_sequence([a, b, b, b],
                                                dtype=data.dtype))
        self.assert_series_equal(result, expected) 
Example #13
Source File: test_subclass.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_subclass_sparse_slice(self):
        # int64
        s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
        exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3])
        tm.assert_sp_series_equal(s.loc[1:3], exp)
        assert s.loc[1:3].dtype == SparseDtype(np.int64)

        exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
        tm.assert_sp_series_equal(s.iloc[1:3], exp)
        assert s.iloc[1:3].dtype == SparseDtype(np.int64)

        exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
        tm.assert_sp_series_equal(s[1:3], exp)
        assert s[1:3].dtype == SparseDtype(np.int64)

        # float64
        s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.])
        exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3])
        tm.assert_sp_series_equal(s.loc[1:3], exp)
        assert s.loc[1:3].dtype == SparseDtype(np.float64)

        exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
        tm.assert_sp_series_equal(s.iloc[1:3], exp)
        assert s.iloc[1:3].dtype == SparseDtype(np.float64)

        exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
        tm.assert_sp_series_equal(s[1:3], exp)
        assert s[1:3].dtype == SparseDtype(np.float64) 
Example #14
Source File: test_sparse.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _check_unsupported(self, data):
        if data.dtype == SparseDtype(int, 0):
            pytest.skip("Can't store nan in int array.") 
Example #15
Source File: test_sparse.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def _check_unsupported(self, data):
        if data.dtype == SparseDtype(int, 0):
            pytest.skip("Can't store nan in int array.") 
Example #16
Source File: test_sparse.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def dtype():
    return SparseDtype() 
Example #17
Source File: test_encoders.py    From dask-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_basic_dataframe(sparse, method, dask_data, dtype):
    a = sklearn.preprocessing.OneHotEncoder(sparse=sparse, dtype=dtype)
    b = dask_ml.preprocessing.OneHotEncoder(sparse=sparse, dtype=dtype)

    if method == "fit":
        a.fit(df)
        b.fit(dask_data)
        expected = a.transform(df)
        result = b.transform(dask_data)
    else:
        expected = a.fit_transform(df)
        result = b.fit_transform(dask_data)

    assert_estimator_equal(
        a,
        b,
        exclude={
            "n_values_",
            "feature_indices_",
            "active_features_",
            "dtypes_",
            "drop_idx_",
        },
    )

    assert isinstance(result, type(dask_data))
    assert len(result.columns) == expected.shape[1]
    if sparse and PANDAS_VERSION >= packaging.version.parse("0.24.0"):
        # pandas sparse ExtensionDtype interface
        dtype = pd.SparseDtype(dtype, dtype(0))
    assert (result.dtypes == dtype).all()

    da.utils.assert_eq(result.values, expected) 
Example #18
Source File: test_patch.py    From scprep with GNU General Public License v3.0 5 votes vote down vote up
def test_fill_value():
    values = pd.Series(np.arange(3), dtype=pd.UInt16Dtype())
    custom_block = CustomBlock(values, placement=slice(1, 2))
    assert pd.isna(custom_block.fill_value)
    values = pd.Series(np.arange(3), dtype=pd.SparseDtype(float, 0.0))
    custom_block = CustomBlock(values, placement=slice(1, 2))
    assert not pd.isna(custom_block.fill_value) 
Example #19
Source File: test_sparse.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _check_unsupported(self, data):
        if data.dtype == SparseDtype(int, 0):
            pytest.skip("Can't store nan in int array.") 
Example #20
Source File: matrix.py    From scprep with GNU General Public License v3.0 5 votes vote down vote up
def SparseDataFrame(X, default_fill_value=0.0):
    if sparse.issparse(X):
        X = pd.DataFrame.sparse.from_spmatrix(X)
        X.sparse.fill_value = default_fill_value
    elif is_SparseDataFrame(X) or not isinstance(X, pd.DataFrame):
        X = pd.DataFrame(X)
    return X.astype(pd.SparseDtype(float, fill_value=default_fill_value)) 
Example #21
Source File: matrix.py    From scprep with GNU General Public License v3.0 5 votes vote down vote up
def SparseSeries(X, default_fill_value=0.0):
    return pd.Series(X).astype(pd.SparseDtype(float, fill_value=default_fill_value)) 
Example #22
Source File: utils.py    From scprep with GNU General Public License v3.0 5 votes vote down vote up
def dataframe_to_sparse(x, fill_value=0.0):
    return x.astype(pd.SparseDtype(float, fill_value=fill_value)) 
Example #23
Source File: utils.py    From anndata with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def ensure_df_homogeneous(
    df: pd.DataFrame, name: str
) -> Union[np.ndarray, sparse.csr_matrix]:
    # TODO: rename this function, I would not expect this to return a non-dataframe
    if all(isinstance(dt, pd.SparseDtype) for dt in df.dtypes):
        arr = df.sparse.to_coo().tocsr()
    else:
        arr = df.to_numpy()
    if df.dtypes.nunique() != 1:
        warnings.warn(f"{name} converted to numpy array with dtype {arr.dtype}")
    return arr 
Example #24
Source File: test_sparse.py    From recruit with Apache License 2.0 5 votes vote down vote up
def dtype():
    return SparseDtype() 
Example #25
Source File: test_subclass.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_subclass_sparse_slice(self):
        # int64
        s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
        exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3])
        tm.assert_sp_series_equal(s.loc[1:3], exp)
        assert s.loc[1:3].dtype == SparseDtype(np.int64)

        exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
        tm.assert_sp_series_equal(s.iloc[1:3], exp)
        assert s.iloc[1:3].dtype == SparseDtype(np.int64)

        exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
        tm.assert_sp_series_equal(s[1:3], exp)
        assert s[1:3].dtype == SparseDtype(np.int64)

        # float64
        s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.])
        exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3])
        tm.assert_sp_series_equal(s.loc[1:3], exp)
        assert s.loc[1:3].dtype == SparseDtype(np.float64)

        exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
        tm.assert_sp_series_equal(s.iloc[1:3], exp)
        assert s.iloc[1:3].dtype == SparseDtype(np.float64)

        exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
        tm.assert_sp_series_equal(s[1:3], exp)
        assert s[1:3].dtype == SparseDtype(np.float64) 
Example #26
Source File: test_sparse.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def dtype():
    return SparseDtype() 
Example #27
Source File: test_dtypes.py    From pandera with MIT License 4 votes vote down vote up
def test_pandas_extension_types():
    """Test pandas extension data type happy path."""
    # pylint: disable=no-member
    test_params = [
        (
            pd.CategoricalDtype(),
            pd.Series(["a", "a", "b", "b", "c", "c"], dtype="category"),
            None
        ),
        (
            pd.DatetimeTZDtype(tz='UTC'),
            pd.Series(
                pd.date_range(start="20200101", end="20200301"),
                dtype="datetime64[ns, utc]"
            ),
            None
        ),
        (pd.Int64Dtype(), pd.Series(range(10), dtype="Int64"), None),
        (pd.StringDtype(), pd.Series(["foo", "bar", "baz"], dtype="string"), None),
        (
            pd.PeriodDtype(freq='D'),
            pd.Series(pd.period_range('1/1/2019', '1/1/2020', freq='D')),
            None
        ),
        (
            pd.SparseDtype("float"),
            pd.Series(range(100)).where(
                lambda s: s < 5, other=np.nan).astype("Sparse[float]"),
            {"nullable": True},
        ),
        (
            pd.BooleanDtype(),
            pd.Series([1, 0, 0, 1, 1], dtype="boolean"),
            None
        ),
        (
            pd.IntervalDtype(subtype="int64"),
            pd.Series(pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])),
            None,
        )
    ]
    for dtype, data, series_kwargs in test_params:
        series_kwargs = {} if series_kwargs is None else series_kwargs
        series_schema = SeriesSchema(pandas_dtype=dtype, **series_kwargs)
        assert isinstance(series_schema.validate(data), pd.Series)