Python pandas.util.testing.assert_categorical_equal() Examples

The following are 30 code examples of pandas.util.testing.assert_categorical_equal(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.util.testing , or try the search function .
Example #1
Source File: test_analytics.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_shift(self):
        # GH 9416
        cat = Categorical(['a', 'b', 'c', 'd', 'a'])

        # shift forward
        sp1 = cat.shift(1)
        xp1 = Categorical([np.nan, 'a', 'b', 'c', 'd'])
        tm.assert_categorical_equal(sp1, xp1)
        tm.assert_categorical_equal(cat[:-1], sp1[1:])

        # shift back
        sn2 = cat.shift(-2)
        xp2 = Categorical(['c', 'd', 'a', np.nan, np.nan],
                          categories=['a', 'b', 'c', 'd'])
        tm.assert_categorical_equal(sn2, xp2)
        tm.assert_categorical_equal(cat[2:], sn2[:-2])

        # shift by zero
        tm.assert_categorical_equal(cat, cat.shift(0)) 
Example #2
Source File: test_indexing.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_setitem(self):

        # int/positional
        c = self.factor.copy()
        c[0] = 'b'
        assert c[0] == 'b'
        c[-1] = 'a'
        assert c[-1] == 'a'

        # boolean
        c = self.factor.copy()
        indexer = np.zeros(len(c), dtype='bool')
        indexer[0] = True
        indexer[-1] = True
        c[indexer] = 'c'
        expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
                               ordered=True)

        tm.assert_categorical_equal(c, expected) 
Example #3
Source File: test_union_categoricals.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_union_categoricals_ordered(self):
        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], ordered=False)

        msg = 'Categorical.ordered must be the same'
        with pytest.raises(TypeError, match=msg):
            union_categoricals([c1, c2])

        res = union_categoricals([c1, c1])
        exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True)
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)

        res = union_categoricals([c1, c2])
        exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True)
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)

        msg = "to union ordered Categoricals, all categories must be the same"
        with pytest.raises(TypeError, match=msg):
            union_categoricals([c1, c2]) 
Example #4
Source File: test_union_categoricals.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_union_categorical_unwrap(self):
        # GH 14173
        c1 = Categorical(['a', 'b'])
        c2 = pd.Series(['b', 'c'], dtype='category')
        result = union_categoricals([c1, c2])
        expected = Categorical(['a', 'b', 'b', 'c'])
        tm.assert_categorical_equal(result, expected)

        c2 = CategoricalIndex(c2)
        result = union_categoricals([c1, c2])
        tm.assert_categorical_equal(result, expected)

        c1 = Series(c1)
        result = union_categoricals([c1, c2])
        tm.assert_categorical_equal(result, expected)

        with pytest.raises(TypeError):
            union_categoricals([c1, ['a', 'b', 'c']]) 
Example #5
Source File: test_analytics.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_describe_categorical_columns(self):
        # GH 11558
        columns = pd.CategoricalIndex(['int1', 'int2', 'obj'],
                                      ordered=True, name='XXX')
        df = DataFrame({'int1': [10, 20, 30, 40, 50],
                        'int2': [10, 20, 30, 40, 50],
                        'obj': ['A', 0, None, 'X', 1]},
                       columns=columns)
        result = df.describe()

        exp_columns = pd.CategoricalIndex(['int1', 'int2'],
                                          categories=['int1', 'int2', 'obj'],
                                          ordered=True, name='XXX')
        expected = DataFrame({'int1': [5, 30, df.int1.std(),
                                       10, 20, 30, 40, 50],
                              'int2': [5, 30, df.int2.std(),
                                       10, 20, 30, 40, 50]},
                             index=['count', 'mean', 'std', 'min', '25%',
                                    '50%', '75%', 'max'],
                             columns=exp_columns)
        tm.assert_frame_equal(result, expected)
        tm.assert_categorical_equal(result.columns.values,
                                    expected.columns.values) 
Example #6
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_astype_category(self, dtype_ordered, cat_ordered):
        # GH 10696/18593
        data = list('abcaacbab')
        cat = Categorical(data, categories=list('bac'), ordered=cat_ordered)

        # standard categories
        dtype = CategoricalDtype(ordered=dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(
            data, categories=cat.categories, ordered=dtype_ordered)
        tm.assert_categorical_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(list('adc'), dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(data, dtype=dtype)
        tm.assert_categorical_equal(result, expected)

        if dtype_ordered is False:
            # dtype='category' can't specify ordered, so only test once
            result = cat.astype('category')
            expected = cat
            tm.assert_categorical_equal(result, expected) 
Example #7
Source File: test_categorical.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_unstack_categorical():
    # GH11558 (example is taken from the original issue)
    df = pd.DataFrame({'a': range(10),
                       'medium': ['A', 'B'] * 5,
                       'artist': list('XYXXY') * 2})
    df['medium'] = df['medium'].astype('category')

    gcat = df.groupby(
        ['artist', 'medium'], observed=False)['a'].count().unstack()
    result = gcat.describe()

    exp_columns = pd.CategoricalIndex(['A', 'B'], ordered=False,
                                      name='medium')
    tm.assert_index_equal(result.columns, exp_columns)
    tm.assert_categorical_equal(result.columns.values, exp_columns.values)

    result = gcat['A'] + gcat['B']
    expected = pd.Series([6, 4], index=pd.Index(['X', 'Y'], name='artist'))
    tm.assert_series_equal(result, expected) 
Example #8
Source File: test_constructors.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_constructor_with_generator(self):
        # This was raising an Error in isna(single_val).any() because isna
        # returned a scalar for a generator
        xrange = range

        exp = Categorical([0, 1, 2])
        cat = Categorical((x for x in [0, 1, 2]))
        tm.assert_categorical_equal(cat, exp)
        cat = Categorical(xrange(3))
        tm.assert_categorical_equal(cat, exp)

        # This uses xrange internally
        from pandas.core.index import MultiIndex
        MultiIndex.from_product([range(5), ['a', 'b', 'c']])

        # check that categories accept generators and sequences
        cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
        tm.assert_categorical_equal(cat, exp)
        cat = Categorical([0, 1, 2], categories=xrange(3))
        tm.assert_categorical_equal(cat, exp) 
Example #9
Source File: test_apply.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_map_categorical(self):
        values = pd.Categorical(list('ABBABCD'), categories=list('DCBA'),
                                ordered=True)
        s = pd.Series(values, name='XX', index=list('abcdefg'))

        result = s.map(lambda x: x.lower())
        exp_values = pd.Categorical(list('abbabcd'), categories=list('dcba'),
                                    ordered=True)
        exp = pd.Series(exp_values, name='XX', index=list('abcdefg'))
        tm.assert_series_equal(result, exp)
        tm.assert_categorical_equal(result.values, exp_values)

        result = s.map(lambda x: 'A')
        exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
        tm.assert_series_equal(result, exp)
        assert result.dtype == np.object

        with pytest.raises(NotImplementedError):
            s.map(lambda x: x, na_action='ignore') 
Example #10
Source File: test_analytics.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_unique_index_series(self):
        c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
        # Categorical.unique sorts categories by appearance order
        # if ordered=False
        exp = Categorical([3, 1, 2], categories=[3, 1, 2])
        tm.assert_categorical_equal(c.unique(), exp)

        tm.assert_index_equal(Index(c).unique(), Index(exp))
        tm.assert_categorical_equal(Series(c).unique(), exp)

        c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
        exp = Categorical([1, 2], categories=[1, 2])
        tm.assert_categorical_equal(c.unique(), exp)
        tm.assert_index_equal(Index(c).unique(), Index(exp))
        tm.assert_categorical_equal(Series(c).unique(), exp)

        c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True)
        # Categorical.unique keeps categories order if ordered=True
        exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True)
        tm.assert_categorical_equal(c.unique(), exp)

        tm.assert_index_equal(Index(c).unique(), Index(exp))
        tm.assert_categorical_equal(Series(c).unique(), exp) 
Example #11
Source File: test_qcut.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_qcut():
    arr = np.random.randn(1000)

    # We store the bins as Index that have been
    # rounded to comparisons are a bit tricky.
    labels, bins = qcut(arr, 4, retbins=True)
    ex_bins = quantile(arr, [0, .25, .5, .75, 1.])

    result = labels.categories.left.values
    assert np.allclose(result, ex_bins[:-1], atol=1e-2)

    result = labels.categories.right.values
    assert np.allclose(result, ex_bins[1:], atol=1e-2)

    ex_levels = cut(arr, ex_bins, include_lowest=True)
    tm.assert_categorical_equal(labels, ex_levels) 
Example #12
Source File: test_analytics.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_apply_categorical(self):
        values = pd.Categorical(list('ABBABCD'), categories=list('DCBA'),
                                ordered=True)
        s = pd.Series(values, name='XX', index=list('abcdefg'))
        result = s.apply(lambda x: x.lower())

        # should be categorical dtype when the number of categories are
        # the same
        values = pd.Categorical(list('abbabcd'), categories=list('dcba'),
                                ordered=True)
        exp = pd.Series(values, name='XX', index=list('abcdefg'))
        tm.assert_series_equal(result, exp)
        tm.assert_categorical_equal(result.values, exp.values)

        result = s.apply(lambda x: 'A')
        exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
        tm.assert_series_equal(result, exp)
        assert result.dtype == np.object 
Example #13
Source File: test_api.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_remove_categories(self):
        cat = Categorical(["a", "b", "c", "a"], ordered=True)
        old = cat.copy()
        new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"],
                          ordered=True)

        # first inplace == False
        res = cat.remove_categories("c")
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        res = cat.remove_categories(["c"])
        tm.assert_categorical_equal(cat, old)
        tm.assert_categorical_equal(res, new)

        # inplace == True
        res = cat.remove_categories("c", inplace=True)
        tm.assert_categorical_equal(cat, new)
        assert res is None

        # removal is not in categories
        with pytest.raises(ValueError):
            cat.remove_categories(["c"]) 
Example #14
Source File: test_cut.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_no_right():
    data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
    result, bins = cut(data, 4, right=False, retbins=True)

    intervals = IntervalIndex.from_breaks(bins.round(3), closed="left")
    intervals = intervals.take([0, 0, 0, 2, 3, 0, 1])
    expected = Categorical(intervals, ordered=True)

    tm.assert_categorical_equal(result, expected)
    tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095])) 
Example #15
Source File: test_cut.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_right():
    data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
    result, bins = cut(data, 4, right=True, retbins=True)

    intervals = IntervalIndex.from_breaks(bins.round(3))
    expected = Categorical(intervals, ordered=True)
    expected = expected.take([0, 0, 0, 2, 3, 0, 0])

    tm.assert_categorical_equal(result, expected)
    tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95, 7.325, 9.7])) 
Example #16
Source File: test_common.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_fspath_all(self, reader, module, path, datapath):
        pytest.importorskip(module)
        path = datapath(*path)

        mypath = CustomFSPath(path)
        result = reader(mypath)
        expected = reader(path)

        if path.endswith('.pickle'):
            # categorical
            tm.assert_categorical_equal(result, expected)
        else:
            tm.assert_frame_equal(result, expected) 
Example #17
Source File: test_construct_from_scalar.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_cast_1d_array_like_from_scalar_categorical():
    # see gh-19565
    #
    # Categorical result from scalar did not maintain
    # categories and ordering of the passed dtype.
    cats = ["a", "b", "c"]
    cat_type = CategoricalDtype(categories=cats, ordered=False)
    expected = Categorical(["a", "a"], categories=cats)

    result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type)
    tm.assert_categorical_equal(result, expected,
                                check_category_order=True,
                                check_dtype=True) 
Example #18
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_order_of_appearance(self):
        # 9346
        # light testing of guarantee of order of appearance
        # these also are the doc-examples
        result = pd.unique(Series([2, 1, 3, 3]))
        tm.assert_numpy_array_equal(result,
                                    np.array([2, 1, 3], dtype='int64'))

        result = pd.unique(Series([2] + [1] * 5))
        tm.assert_numpy_array_equal(result,
                                    np.array([2, 1], dtype='int64'))

        result = pd.unique(Series([Timestamp('20160101'),
                                   Timestamp('20160101')]))
        expected = np.array(['2016-01-01T00:00:00.000000000'],
                            dtype='datetime64[ns]')
        tm.assert_numpy_array_equal(result, expected)

        result = pd.unique(Index(
            [Timestamp('20160101', tz='US/Eastern'),
             Timestamp('20160101', tz='US/Eastern')]))
        expected = DatetimeIndex(['2016-01-01 00:00:00'],
                                 dtype='datetime64[ns, US/Eastern]',
                                 freq=None)
        tm.assert_index_equal(result, expected)

        result = pd.unique(list('aabc'))
        expected = np.array(['a', 'b', 'c'], dtype=object)
        tm.assert_numpy_array_equal(result, expected)

        result = pd.unique(Series(Categorical(list('aabc'))))
        expected = Categorical(list('abc'))
        tm.assert_categorical_equal(result, expected) 
Example #19
Source File: test_algos.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_categorical(self):
        c = Categorical([1, 2])
        exp = c
        tm.assert_categorical_equal(algos.mode(c), exp)
        tm.assert_categorical_equal(c.mode(), exp)

        c = Categorical([1, 'a', 'a'])
        exp = Categorical(['a'], categories=[1, 'a'])
        tm.assert_categorical_equal(algos.mode(c), exp)
        tm.assert_categorical_equal(c.mode(), exp)

        c = Categorical([1, 1, 2, 3, 3])
        exp = Categorical([1, 3], categories=[1, 2, 3])
        tm.assert_categorical_equal(algos.mode(c), exp)
        tm.assert_categorical_equal(c.mode(), exp) 
Example #20
Source File: test_categorical.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_describe_categorical_columns():
    # GH 11558
    cats = pd.CategoricalIndex(['qux', 'foo', 'baz', 'bar'],
                               categories=['foo', 'bar', 'baz', 'qux'],
                               ordered=True)
    df = DataFrame(np.random.randn(20, 4), columns=cats)
    result = df.groupby([1, 2, 3, 4] * 5).describe()

    tm.assert_index_equal(result.stack().columns, cats)
    tm.assert_categorical_equal(result.stack().columns.values, cats.values) 
Example #21
Source File: test_union_categoricals.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_union_categorical_same_category(self):
        # check fastpath
        c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
        c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4])
        res = union_categoricals([c1, c2])
        exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan],
                          categories=[1, 2, 3, 4])
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical(['z', 'z', 'z'], categories=['x', 'y', 'z'])
        c2 = Categorical(['x', 'x', 'x'], categories=['x', 'y', 'z'])
        res = union_categoricals([c1, c2])
        exp = Categorical(['z', 'z', 'z', 'x', 'x', 'x'],
                          categories=['x', 'y', 'z'])
        tm.assert_categorical_equal(res, exp) 
Example #22
Source File: test_cut.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_bins():
    data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1])
    result, bins = cut(data, 3, retbins=True)

    intervals = IntervalIndex.from_breaks(bins.round(3))
    intervals = intervals.take([0, 0, 0, 1, 2, 0])
    expected = Categorical(intervals, ordered=True)

    tm.assert_categorical_equal(result, expected)
    tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667,
                                           6.53333333, 9.7])) 
Example #23
Source File: test_qcut.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_qcut_index():
    result = qcut([0, 2], 2)
    intervals = [Interval(-0.001, 1), Interval(1, 2)]

    expected = Categorical(intervals, ordered=True)
    tm.assert_categorical_equal(result, expected) 
Example #24
Source File: test_astype.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_astype_category(self):
        obj = pd.period_range("2000", periods=2)
        result = obj.astype('category')
        expected = pd.CategoricalIndex([pd.Period('2000-01-01', freq="D"),
                                        pd.Period('2000-01-02', freq="D")])
        tm.assert_index_equal(result, expected)

        result = obj._data.astype('category')
        expected = expected.values
        tm.assert_categorical_equal(result, expected) 
Example #25
Source File: test_astype.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_astype_category(self, tz):
        obj = pd.date_range("2000", periods=2, tz=tz)
        result = obj.astype('category')
        expected = pd.CategoricalIndex([pd.Timestamp('2000-01-01', tz=tz),
                                        pd.Timestamp('2000-01-02', tz=tz)])
        tm.assert_index_equal(result, expected)

        result = obj._data.astype('category')
        expected = expected.values
        tm.assert_categorical_equal(result, expected) 
Example #26
Source File: test_category.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_map_with_nan(self, data, f):  # GH 24241
        values = pd.Categorical(data)
        result = values.map(f)
        if data[1] == 1:
            expected = pd.Categorical([False, False, np.nan])
            tm.assert_categorical_equal(result, expected)
        else:
            expected = pd.Index([False, False, np.nan])
            tm.assert_index_equal(result, expected) 
Example #27
Source File: test_astype.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_astype_category(self):
        obj = pd.timedelta_range("1H", periods=2, freq='H')

        result = obj.astype('category')
        expected = pd.CategoricalIndex([pd.Timedelta('1H'),
                                        pd.Timedelta('2H')])
        tm.assert_index_equal(result, expected)

        result = obj._data.astype('category')
        expected = expected.values
        tm.assert_categorical_equal(result, expected) 
Example #28
Source File: test_constructors.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_from_inferred_categories_coerces(self):
        cats = ['1', '2', 'bad']
        codes = np.array([0, 0, 1, 2], dtype='i8')
        dtype = CategoricalDtype([1, 2])
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical([1, 1, 2, np.nan])
        tm.assert_categorical_equal(result, expected) 
Example #29
Source File: test_constructors.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_from_inferred_categories_dtype(self):
        cats = ['a', 'b', 'd']
        codes = np.array([0, 1, 0, 2], dtype='i8')
        dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True)
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical(['a', 'b', 'a', 'd'],
                               categories=['c', 'b', 'a'],
                               ordered=True)
        tm.assert_categorical_equal(result, expected) 
Example #30
Source File: test_constructors.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_from_inferred_categories_sorts(self, dtype):
        cats = ['b', 'a']
        codes = np.array([0, 1, 1, 1], dtype='i8')
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
        tm.assert_categorical_equal(result, expected)