Python pandas.util.testing.assert_series_equal() Examples

The following are 30 code examples of pandas.util.testing.assert_series_equal(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.util.testing , or try the search function .
Example #1
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_skew_edge_cases(self):

        all_nan = Series([np.NaN] * 5)

        # yields all NaN (0 variance)
        d = Series([1] * 5)
        x = d.rolling(window=5).skew()
        tm.assert_series_equal(all_nan, x)

        # yields all NaN (window too small)
        d = Series(np.random.randn(5))
        x = d.rolling(window=2).skew()
        tm.assert_series_equal(all_nan, x)

        # yields [NaN, NaN, NaN, 0.177994, 1.548824]
        d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401
                    ])
        expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])
        x = d.rolling(window=4).skew()
        tm.assert_series_equal(expected, x) 
Example #2
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_cov_offset(self):
        # GH16058

        idx = pd.date_range('2017-01-01', periods=24, freq='1h')
        ss = Series(np.arange(len(idx)), index=idx)

        result = ss.rolling('2h').cov()
        expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
        tm.assert_series_equal(result, expected)

        expected2 = ss.rolling(2, min_periods=1).cov()
        tm.assert_series_equal(result, expected2)

        result = ss.rolling('3h').cov()
        expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
        tm.assert_series_equal(result, expected)

        expected2 = ss.rolling(3, min_periods=1).cov()
        tm.assert_series_equal(result, expected2) 
Example #3
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_cmov_window_special_linear_range(self, win_types_special):
        # GH 8238
        kwds = {
            'kaiser': {'beta': 1.},
            'gaussian': {'std': 1.},
            'general_gaussian': {'power': 2., 'width': 2.},
            'slepian': {'width': 0.5}}

        vals = np.array(range(10), dtype=np.float)
        xp = vals.copy()
        xp[:2] = np.nan
        xp[-2:] = np.nan
        xp = Series(xp)

        rs = Series(vals).rolling(
            5, win_type=win_types_special, center=True).mean(
            **kwds[win_types_special])
        tm.assert_series_equal(xp, rs) 
Example #4
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_groupby_monotonic(self):

        # GH 15130
        # we don't need to validate monotonicity when grouping

        data = [
            ['David', '1/1/2015', 100], ['David', '1/5/2015', 500],
            ['David', '5/30/2015', 50], ['David', '7/25/2015', 50],
            ['Ryan', '1/4/2014', 100], ['Ryan', '1/19/2015', 500],
            ['Ryan', '3/31/2016', 50], ['Joe', '7/1/2015', 100],
            ['Joe', '9/9/2015', 500], ['Joe', '10/15/2015', 50]]

        df = DataFrame(data=data, columns=['name', 'date', 'amount'])
        df['date'] = pd.to_datetime(df['date'])

        expected = df.set_index('date').groupby('name').apply(
            lambda x: x.rolling('180D')['amount'].sum())
        result = df.groupby('name').rolling('180D', on='date')['amount'].sum()
        tm.assert_series_equal(result, expected) 
Example #5
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_expanding_corr_cov(self):
        g = self.frame.groupby('A')
        r = g.expanding()

        for f in ['corr', 'cov']:
            result = getattr(r, f)(self.frame)

            def func(x):
                return getattr(x.expanding(), f)(self.frame)
            expected = g.apply(func)
            tm.assert_frame_equal(result, expected)

            result = getattr(r.B, f)(pairwise=True)

            def func(x):
                return getattr(x.B.expanding(), f)(pairwise=True)
            expected = g.apply(func)
            tm.assert_series_equal(result, expected) 
Example #6
Source File: test_series.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_to_dense_fill_value(self):
        s = pd.Series([1, np.nan, np.nan, 3, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([1, np.nan, 0, 3, 0])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s).to_dense()
        tm.assert_series_equal(res, s)

        s = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan])
        res = SparseSeries(s, fill_value=0).to_dense()
        tm.assert_series_equal(res, s) 
Example #7
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_corr_cov(self):
        g = self.frame.groupby('A')
        r = g.rolling(window=4)

        for f in ['corr', 'cov']:
            result = getattr(r, f)(self.frame)

            def func(x):
                return getattr(x.rolling(4), f)(self.frame)
            expected = g.apply(func)
            tm.assert_frame_equal(result, expected)

            result = getattr(r.B, f)(pairwise=True)

            def func(x):
                return getattr(x.B.rolling(4), f)(pairwise=True)
            expected = g.apply(func)
            tm.assert_series_equal(result, expected) 
Example #8
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_getitem(self):
        g = self.frame.groupby('A')
        g_mutated = self.frame.groupby('A', mutated=True)

        expected = g_mutated.B.apply(lambda x: x.rolling(2).mean())

        result = g.rolling(2).mean().B
        tm.assert_series_equal(result, expected)

        result = g.rolling(2).B.mean()
        tm.assert_series_equal(result, expected)

        result = g.B.rolling(2).mean()
        tm.assert_series_equal(result, expected)

        result = self.frame.B.groupby(self.frame.A).rolling(2).mean()
        tm.assert_series_equal(result, expected) 
Example #9
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_median_resample(self):

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 3 datapoints on last day (4, 10, and 20)
        indices.append(datetime(1975, 1, 5, 1))
        indices.append(datetime(1975, 1, 5, 2))
        series = Series(list(range(0, 5)) + [10, 20], index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        # Default how should be median
        expected = Series([0.0, 1.0, 2.0, 3.0, 10],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = series.resample('D').median().rolling(window=1).median()
        tm.assert_series_equal(expected, x) 
Example #10
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_min_resample(self):

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 3 datapoints on last day (4, 10, and 20)
        indices.append(datetime(1975, 1, 5, 1))
        indices.append(datetime(1975, 1, 5, 2))
        series = Series(list(range(0, 5)) + [10, 20], index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        # Default how should be min
        expected = Series([0.0, 1.0, 2.0, 3.0, 4.0],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        r = series.resample('D').min().rolling(window=1)
        tm.assert_series_equal(expected, r.min()) 
Example #11
Source File: test_series.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_dropna(self):
        sp = SparseSeries([0, 0, 0, nan, nan, 5, 6], fill_value=0)

        sp_valid = sp.dropna()

        expected = sp.to_dense().dropna()
        expected = expected[expected != 0]
        exp_arr = pd.SparseArray(expected.values, fill_value=0, kind='block')
        tm.assert_sp_array_equal(sp_valid.values, exp_arr)
        tm.assert_index_equal(sp_valid.index, expected.index)
        assert len(sp_valid.sp_values) == 2

        result = self.bseries.dropna()
        expected = self.bseries.to_dense().dropna()
        assert not isinstance(result, SparseSeries)
        tm.assert_series_equal(result, expected) 
Example #12
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_expanding_cov_diff_index(self):
        # GH 7512
        s1 = Series([1, 2, 3], index=[0, 1, 2])
        s2 = Series([1, 3], index=[0, 2])
        result = s1.expanding().cov(s2)
        expected = Series([None, None, 2.0])
        tm.assert_series_equal(result, expected)

        s2a = Series([1, None, 3], index=[0, 1, 2])
        result = s1.expanding().cov(s2a)
        tm.assert_series_equal(result, expected)

        s1 = Series([7, 8, 10], index=[0, 1, 3])
        s2 = Series([7, 9, 10], index=[0, 2, 3])
        result = s1.expanding().cov(s2)
        expected = Series([None, None, None, 4.5])
        tm.assert_series_equal(result, expected) 
Example #13
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_max_gh6297(self):
        """Replicate result expected in GH #6297"""

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 2 datapoints on one of the days
        indices.append(datetime(1975, 1, 3, 6, 0))
        series = Series(range(1, 7), index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        expected = Series([1.0, 2.0, 6.0, 4.0, 5.0],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = series.resample('D').max().rolling(window=1).max()
        tm.assert_series_equal(expected, x) 
Example #14
Source File: test_series.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_value_counts_dup(self):
        vals = [1, 2, nan, 0, nan, 1, 2, nan, nan, 1, 2, 0, 1, 1]

        # numeric op may cause sp_values to include the same value as
        # fill_value
        dense = pd.Series(vals, name='xx') / 0.
        sparse = pd.SparseSeries(vals, name='xx') / 0.
        tm.assert_series_equal(sparse.value_counts(),
                               dense.value_counts())
        tm.assert_series_equal(sparse.value_counts(dropna=False),
                               dense.value_counts(dropna=False))

        vals = [1, 2, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0, 1, 1]

        dense = pd.Series(vals, name='xx') * 0.
        sparse = pd.SparseSeries(vals, name='xx') * 0.
        tm.assert_series_equal(sparse.value_counts(),
                               dense.value_counts())
        tm.assert_series_equal(sparse.value_counts(dropna=False),
                               dense.value_counts(dropna=False)) 
Example #15
Source File: test_decimal.py    From recruit with Apache License 2.0 6 votes vote down vote up
def assert_frame_equal(self, left, right, *args, **kwargs):
        # TODO(EA): select_dtypes
        tm.assert_index_equal(
            left.columns, right.columns,
            exact=kwargs.get('check_column_type', 'equiv'),
            check_names=kwargs.get('check_names', True),
            check_exact=kwargs.get('check_exact', False),
            check_categorical=kwargs.get('check_categorical', True),
            obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))

        decimals = (left.dtypes == 'decimal').index

        for col in decimals:
            self.assert_series_equal(left[col], right[col],
                                     *args, **kwargs)

        left = left.drop(columns=decimals)
        right = right.drop(columns=decimals)
        tm.assert_frame_equal(left, right, *args, **kwargs) 
Example #16
Source File: test_decimal.py    From recruit with Apache License 2.0 6 votes vote down vote up
def assert_series_equal(self, left, right, *args, **kwargs):
        def convert(x):
            # need to convert array([Decimal(NaN)], dtype='object') to np.NaN
            # because Series[object].isnan doesn't recognize decimal(NaN) as
            # NA.
            try:
                return math.isnan(x)
            except TypeError:
                return False

        if left.dtype == 'object':
            left_na = left.apply(convert)
        else:
            left_na = left.isna()
        if right.dtype == 'object':
            right_na = right.apply(convert)
        else:
            right_na = right.isna()

        tm.assert_series_equal(left_na, right_na)
        return tm.assert_series_equal(left[~left_na],
                                      right[~right_na],
                                      *args, **kwargs) 
Example #17
Source File: test_json.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_custom_asserts(self):
        # This would always trigger the KeyError from trying to put
        # an array of equal-length UserDicts inside an ndarray.
        data = JSONArray([collections.UserDict({'a': 1}),
                          collections.UserDict({'b': 2}),
                          collections.UserDict({'c': 3})])
        a = pd.Series(data)
        self.assert_series_equal(a, a)
        self.assert_frame_equal(a.to_frame(), a.to_frame())

        b = pd.Series(data.take([0, 0, 1]))
        with pytest.raises(AssertionError):
            self.assert_series_equal(a, b)

        with pytest.raises(AssertionError):
            self.assert_frame_equal(a.to_frame(), b.to_frame()) 
Example #18
Source File: test_series.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_value_counts_int(self):
        vals = [1, 2, 0, 1, 2, 1, 2, 0, 1, 1]
        dense = pd.Series(vals, name='xx')

        # fill_value is np.nan, but should not be included in the result
        sparse = pd.SparseSeries(vals, name='xx')
        tm.assert_series_equal(sparse.value_counts(),
                               dense.value_counts())
        tm.assert_series_equal(sparse.value_counts(dropna=False),
                               dense.value_counts(dropna=False))

        sparse = pd.SparseSeries(vals, name='xx', fill_value=0)
        tm.assert_series_equal(sparse.value_counts(),
                               dense.value_counts())
        tm.assert_series_equal(sparse.value_counts(dropna=False),
                               dense.value_counts(dropna=False)) 
Example #19
Source File: test_sparse.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _compare_other(self, s, data, op_name, other):
        op = self.get_op_from_name(op_name)

        # array
        result = pd.Series(op(data, other))
        # hard to test the fill value, since we don't know what expected
        # is in general.
        # Rely on tests in `tests/sparse` to validate that.
        assert isinstance(result.dtype, SparseDtype)
        assert result.dtype.subtype == np.dtype('bool')

        with np.errstate(all='ignore'):
            expected = pd.Series(
                pd.SparseArray(op(np.asarray(data), np.asarray(other)),
                               fill_value=result.values.fill_value)
            )

        tm.assert_series_equal(result, expected)

        # series
        s = pd.Series(data)
        result = op(s, other)
        tm.assert_series_equal(result, expected) 
Example #20
Source File: test_sparse.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_where_series(self, data, na_value):
        assert data[0] != data[1]
        cls = type(data)
        a, b = data[:2]

        ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))

        cond = np.array([True, True, False, False])
        result = ser.where(cond)

        new_dtype = SparseDtype('float', 0.0)
        expected = pd.Series(cls._from_sequence([a, a, na_value, na_value],
                                                dtype=new_dtype))
        self.assert_series_equal(result, expected)

        other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
        cond = np.array([True, False, True, True])
        result = ser.where(cond, other)
        expected = pd.Series(cls._from_sequence([a, b, b, b],
                                                dtype=data.dtype))
        self.assert_series_equal(result, expected) 
Example #21
Source File: test_sparse.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_combine_le(self, data_repeated):
        # We return a Series[SparseArray].__le__ returns a
        # Series[Sparse[bool]]
        # rather than Series[bool]
        orig_data1, orig_data2 = data_repeated(2)
        s1 = pd.Series(orig_data1)
        s2 = pd.Series(orig_data2)
        result = s1.combine(s2, lambda x1, x2: x1 <= x2)
        expected = pd.Series(pd.SparseArray([
            a <= b for (a, b) in
            zip(list(orig_data1), list(orig_data2))
        ], fill_value=False))
        self.assert_series_equal(result, expected)

        val = s1.iloc[0]
        result = s1.combine(val, lambda x1, x2: x1 <= x2)
        expected = pd.Series(pd.SparseArray([
            a <= val for a in list(orig_data1)
        ], fill_value=False))
        self.assert_series_equal(result, expected) 
Example #22
Source File: test_sparse.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_isna(self, data_missing):
        expected_dtype = SparseDtype(bool,
                                     pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)

        result = pd.isna(data_missing)
        self.assert_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=expected_dtype)
        self.assert_series_equal(result, expected) 
Example #23
Source File: test_series.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_isna(self):
        # GH 8276
        s = pd.SparseSeries([np.nan, np.nan, 1, 2, np.nan], name='xxx')

        res = s.isna()
        exp = pd.SparseSeries([True, True, False, False, True], name='xxx',
                              fill_value=True)
        tm.assert_sp_series_equal(res, exp)

        # if fill_value is not nan, True can be included in sp_values
        s = pd.SparseSeries([np.nan, 0., 1., 2., 0.], name='xxx',
                            fill_value=0.)
        res = s.isna()
        assert isinstance(res, pd.SparseSeries)
        exp = pd.Series([True, False, False, False, False], name='xxx')
        tm.assert_series_equal(res.to_dense(), exp) 
Example #24
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_kurt_edge_cases(self):

        all_nan = Series([np.NaN] * 5)

        # yields all NaN (0 variance)
        d = Series([1] * 5)
        x = d.rolling(window=5).kurt()
        tm.assert_series_equal(all_nan, x)

        # yields all NaN (window too small)
        d = Series(np.random.randn(5))
        x = d.rolling(window=3).kurt()
        tm.assert_series_equal(all_nan, x)

        # yields [NaN, NaN, NaN, 1.224307, 2.671499]
        d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401
                    ])
        expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])
        x = d.rolling(window=4).kurt()
        tm.assert_series_equal(expected, x) 
Example #25
Source File: test_window.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_rolling_functions_window_non_shrinkage(self, f):
        # GH 7764
        s = Series(range(4))
        s_expected = Series(np.nan, index=s.index)
        df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B'])
        df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)

        try:
            s_result = f(s)
            tm.assert_series_equal(s_result, s_expected)

            df_result = f(df)
            tm.assert_frame_equal(df_result, df_expected)
        except (ImportError):

            # scipy needed for rolling_window
            pytest.skip("scipy not available") 
Example #26
Source File: test_series.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_numpy_cumsum(self):
        result = np.cumsum(self.bseries)
        expected = SparseSeries(self.bseries.to_dense().cumsum())
        tm.assert_sp_series_equal(result, expected)

        result = np.cumsum(self.zbseries)
        expected = self.zbseries.to_dense().cumsum().to_sparse()
        tm.assert_series_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(self.bseries, dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.cumsum(self.zbseries, out=result) 
Example #27
Source File: test_window.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_rolling_max_resample(self):

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 3 datapoints on last day (4, 10, and 20)
        indices.append(datetime(1975, 1, 5, 1))
        indices.append(datetime(1975, 1, 5, 2))
        series = Series(list(range(0, 5)) + [10, 20], index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        # Default how should be max
        expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = series.resample('D').max().rolling(window=1).max()
        tm.assert_series_equal(expected, x)

        # Now specify median (10.0)
        expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = series.resample('D').median().rolling(window=1).max()
        tm.assert_series_equal(expected, x)

        # Now specify mean (4+10+20)/3
        v = (4.0 + 10.0 + 20.0) / 3.0
        expected = Series([0.0, 1.0, 2.0, 3.0, v],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = series.resample('D').mean().rolling(window=1).max()
        tm.assert_series_equal(expected, x) 
Example #28
Source File: test_window.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_frame_on2(self):

        # using multiple aggregation columns
        df = DataFrame({'A': [0, 1, 2, 3, 4],
                        'B': [0, 1, 2, np.nan, 4],
                        'C': Index([Timestamp('20130101 09:00:00'),
                                    Timestamp('20130101 09:00:02'),
                                    Timestamp('20130101 09:00:03'),
                                    Timestamp('20130101 09:00:05'),
                                    Timestamp('20130101 09:00:06')])},
                       columns=['A', 'C', 'B'])

        expected1 = DataFrame({'A': [0., 1, 3, 3, 7],
                               'B': [0, 1, 3, np.nan, 4],
                               'C': df['C']},
                              columns=['A', 'C', 'B'])

        result = df.rolling('2s', on='C').sum()
        expected = expected1
        tm.assert_frame_equal(result, expected)

        expected = Series([0, 1, 3, np.nan, 4], name='B')
        result = df.rolling('2s', on='C').B.sum()
        tm.assert_series_equal(result, expected)

        expected = expected1[['A', 'B', 'C']]
        result = df.rolling('2s', on='C')[['A', 'B', 'C']].sum()
        tm.assert_frame_equal(result, expected) 
Example #29
Source File: test_series.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_astype_all(self):
        orig = pd.Series(np.array([1, 2, 3]))
        s = SparseSeries(orig)

        types = [np.float64, np.float32, np.int64,
                 np.int32, np.int16, np.int8]
        for typ in types:
            dtype = SparseDtype(typ)
            res = s.astype(dtype)
            assert res.dtype == dtype
            tm.assert_series_equal(res.to_dense(), orig.astype(typ)) 
Example #30
Source File: test_series.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_reindex(self):
        def _compare_with_series(sps, new_index):
            spsre = sps.reindex(new_index)

            series = sps.to_dense()
            seriesre = series.reindex(new_index)
            seriesre = seriesre.to_sparse(fill_value=sps.fill_value)

            tm.assert_sp_series_equal(spsre, seriesre)
            tm.assert_series_equal(spsre.to_dense(), seriesre.to_dense())

        _compare_with_series(self.bseries, self.bseries.index[::2])
        _compare_with_series(self.bseries, list(self.bseries.index[::2]))
        _compare_with_series(self.bseries, self.bseries.index[:10])
        _compare_with_series(self.bseries, self.bseries.index[5:])

        _compare_with_series(self.zbseries, self.zbseries.index[::2])
        _compare_with_series(self.zbseries, self.zbseries.index[:10])
        _compare_with_series(self.zbseries, self.zbseries.index[5:])

        # special cases
        same_index = self.bseries.reindex(self.bseries.index)
        tm.assert_sp_series_equal(self.bseries, same_index)
        assert same_index is not self.bseries

        # corner cases
        sp = SparseSeries([], index=[])
        # TODO: sp_zero is not used anywhere...remove?
        sp_zero = SparseSeries([], index=[], fill_value=0)  # noqa
        _compare_with_series(sp, np.arange(10))

        # with copy=False
        reindexed = self.bseries.reindex(self.bseries.index, copy=True)
        reindexed.sp_values[:] = 1.
        assert (self.bseries.sp_values != 1.).all()

        reindexed = self.bseries.reindex(self.bseries.index, copy=False)
        reindexed.sp_values[:] = 1.
        tm.assert_numpy_array_equal(self.bseries.sp_values, np.repeat(1., 10))