Python pandas.core.dtypes.dtypes.CategoricalDtype() Examples

The following are 30 code examples of pandas.core.dtypes.dtypes.CategoricalDtype(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.dtypes.dtypes , or try the search function .
Example #1
Source File: dtypes.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_categoricaldtype_coerces_datetime(self):
        dtype = {
            'b': CategoricalDtype(pd.date_range('2017', '2019', freq='AS'))
        }
        data = "b\n2017-01-01\n2018-01-01\n2019-01-01"
        expected = pd.DataFrame({'b': Categorical(dtype['b'].categories)})
        result = self.read_csv(StringIO(data), dtype=dtype)
        tm.assert_frame_equal(result, expected)

        dtype = {
            'b': CategoricalDtype([pd.Timestamp("2014")])
        }
        data = "b\n2014-01-01\n2014-01-01T00:00:00"
        expected = pd.DataFrame({'b': Categorical([pd.Timestamp('2014')] * 2)})
        result = self.read_csv(StringIO(data), dtype=dtype)
        tm.assert_frame_equal(result, expected) 
Example #2
Source File: test_concat.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_categorical_index_preserver(self):

        a = Series(np.arange(6, dtype='int64'))
        b = Series(list('aabbca'))

        df2 = DataFrame({'A': a,
                         'B': b.astype(CategoricalDtype(list('cab')))
                         }).set_index('B')
        result = pd.concat([df2, df2])
        expected = DataFrame(
            {'A': pd.concat([a, a]),
             'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))
             }).set_index('B')
        tm.assert_frame_equal(result, expected)

        # wrong catgories
        df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe'))
                         }).set_index('B')
        msg = "categories must match existing categories when appending"
        with pytest.raises(TypeError, match=msg):
            pd.concat([df2, df3]) 
Example #3
Source File: dtypes.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_categorical_categoricaldtype_chunksize(self):
        # GH 10153
        data = """a,b
1,a
1,b
1,b
2,c"""
        cats = ['a', 'b', 'c']
        expecteds = [pd.DataFrame({'a': [1, 1],
                                   'b': Categorical(['a', 'b'],
                                                    categories=cats)}),
                     pd.DataFrame({'a': [1, 2],
                                   'b': Categorical(['b', 'c'],
                                                    categories=cats)},
                                  index=[2, 3])]
        dtype = CategoricalDtype(cats)
        actuals = self.read_csv(StringIO(data), dtype={'b': dtype},
                                chunksize=2)

        for actual, expected in zip(actuals, expecteds):
            tm.assert_frame_equal(actual, expected) 
Example #4
Source File: test_categorical.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_getitem_category_type(self):
        # GH 14580
        # test iloc() on Series with Categorical data

        s = Series([1, 2, 3]).astype('category')

        # get slice
        result = s.iloc[0:2]
        expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected)

        # get list of indexes
        result = s.iloc[[0, 1]]
        expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected)

        # get boolean array
        result = s.iloc[[True, False, False]]
        expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
        tm.assert_series_equal(result, expected) 
Example #5
Source File: test_dtypes.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_astype_category(self, dtype_ordered, cat_ordered):
        # GH 10696/18593
        data = list('abcaacbab')
        cat = Categorical(data, categories=list('bac'), ordered=cat_ordered)

        # standard categories
        dtype = CategoricalDtype(ordered=dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(
            data, categories=cat.categories, ordered=dtype_ordered)
        tm.assert_categorical_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(list('adc'), dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(data, dtype=dtype)
        tm.assert_categorical_equal(result, expected)

        if dtype_ordered is False:
            # dtype='category' can't specify ordered, so only test once
            result = cat.astype('category')
            expected = cat
            tm.assert_categorical_equal(result, expected) 
Example #6
Source File: common.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_astype_category(self, copy, name, ordered):
        # GH 18630
        index = self.create_index()
        if name:
            index = index.rename(name)

        # standard categories
        dtype = CategoricalDtype(ordered=ordered)
        result = index.astype(dtype, copy=copy)
        expected = CategoricalIndex(index.values, name=name, ordered=ordered)
        tm.assert_index_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
        result = index.astype(dtype, copy=copy)
        expected = CategoricalIndex(index.values, name=name, dtype=dtype)
        tm.assert_index_equal(result, expected)

        if ordered is False:
            # dtype='category' defaults to ordered=False, so only test once
            result = index.astype('category', copy=copy)
            expected = CategoricalIndex(index.values, name=name)
            tm.assert_index_equal(result, expected) 
Example #7
Source File: test_dtypes.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_astype_category(self, dtype_ordered, cat_ordered):
        # GH 10696/18593
        data = list('abcaacbab')
        cat = Categorical(data, categories=list('bac'), ordered=cat_ordered)

        # standard categories
        dtype = CategoricalDtype(ordered=dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(
            data, categories=cat.categories, ordered=dtype_ordered)
        tm.assert_categorical_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(list('adc'), dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(data, dtype=dtype)
        tm.assert_categorical_equal(result, expected)

        if dtype_ordered is False:
            # dtype='category' can't specify ordered, so only test once
            result = cat.astype('category')
            expected = cat
            tm.assert_categorical_equal(result, expected) 
Example #8
Source File: dtypes.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_categorical_categoricaldtype(self, categories, ordered):
        data = """a,b
1,a
1,b
1,b
2,c"""
        expected = pd.DataFrame({
            "a": [1, 1, 1, 2],
            "b": Categorical(['a', 'b', 'b', 'c'],
                             categories=categories,
                             ordered=ordered)
        })
        dtype = {"b": CategoricalDtype(categories=categories,
                                       ordered=ordered)}
        result = self.read_csv(StringIO(data), dtype=dtype)
        tm.assert_frame_equal(result, expected) 
Example #9
Source File: common.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_astype_category(self, copy, name, ordered):
        # GH 18630
        index = self.create_index()
        if name:
            index = index.rename(name)

        # standard categories
        dtype = CategoricalDtype(ordered=ordered)
        result = index.astype(dtype, copy=copy)
        expected = CategoricalIndex(index.values, name=name, ordered=ordered)
        tm.assert_index_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
        result = index.astype(dtype, copy=copy)
        expected = CategoricalIndex(index.values, name=name, dtype=dtype)
        tm.assert_index_equal(result, expected)

        if ordered is False:
            # dtype='category' defaults to ordered=False, so only test once
            result = index.astype('category', copy=copy)
            expected = CategoricalIndex(index.values, name=name)
            tm.assert_index_equal(result, expected) 
Example #10
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_categorical_category_dtype(all_parsers, categories, ordered):
    parser = all_parsers
    data = """a,b
1,a
1,b
1,b
2,c"""
    expected = DataFrame({
        "a": [1, 1, 1, 2],
        "b": Categorical(["a", "b", "b", "c"],
                         categories=categories,
                         ordered=ordered)
    })

    dtype = {"b": CategoricalDtype(categories=categories,
                                   ordered=ordered)}
    result = parser.read_csv(StringIO(data), dtype=dtype)
    tm.assert_frame_equal(result, expected) 
Example #11
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
    # see gh-10153
    parser = all_parsers
    data = """a,b
1,a
1,b
1,b
2,c"""
    cats = ["a", "b", "c"]
    expecteds = [DataFrame({"a": [1, 1],
                            "b": Categorical(["a", "b"],
                                             categories=cats)}),
                 DataFrame({"a": [1, 2],
                            "b": Categorical(["b", "c"],
                                             categories=cats)},
                           index=[2, 3])]
    dtype = CategoricalDtype(cats)
    actuals = parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2)

    for actual, expected in zip(actuals, expecteds):
        tm.assert_frame_equal(actual, expected) 
Example #12
Source File: test_concat.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_categorical_index_preserver(self):

        a = Series(np.arange(6, dtype='int64'))
        b = Series(list('aabbca'))

        df2 = DataFrame({'A': a,
                         'B': b.astype(CategoricalDtype(list('cab')))
                         }).set_index('B')
        result = pd.concat([df2, df2])
        expected = DataFrame(
            {'A': pd.concat([a, a]),
             'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))
             }).set_index('B')
        tm.assert_frame_equal(result, expected)

        # wrong catgories
        df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe'))
                         }).set_index('B')
        pytest.raises(TypeError, lambda: pd.concat([df2, df3])) 
Example #13
Source File: test_dtypes.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_astype_category(self, dtype_ordered, cat_ordered):
        # GH 10696/18593
        data = list('abcaacbab')
        cat = Categorical(data, categories=list('bac'), ordered=cat_ordered)

        # standard categories
        dtype = CategoricalDtype(ordered=dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(
            data, categories=cat.categories, ordered=dtype_ordered)
        tm.assert_categorical_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(list('adc'), dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(data, dtype=dtype)
        tm.assert_categorical_equal(result, expected)

        if dtype_ordered is False:
            # dtype='category' can't specify ordered, so only test once
            result = cat.astype('category')
            expected = cat
            tm.assert_categorical_equal(result, expected) 
Example #14
Source File: common.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_astype_category(self, copy, name, ordered):
        # GH 18630
        index = self.create_index()
        if name:
            index = index.rename(name)

        # standard categories
        dtype = CategoricalDtype(ordered=ordered)
        result = index.astype(dtype, copy=copy)
        expected = CategoricalIndex(index.values, name=name, ordered=ordered)
        tm.assert_index_equal(result, expected)

        # non-standard categories
        dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered)
        result = index.astype(dtype, copy=copy)
        expected = CategoricalIndex(index.values, name=name, dtype=dtype)
        tm.assert_index_equal(result, expected)

        if ordered is False:
            # dtype='category' defaults to ordered=False, so only test once
            result = index.astype('category', copy=copy)
            expected = CategoricalIndex(index.values, name=name)
            tm.assert_index_equal(result, expected) 
Example #15
Source File: test_constructors.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_constructor_with_dtype(self, ordered):
        categories = ['b', 'a', 'c']
        dtype = CategoricalDtype(categories, ordered=ordered)
        result = Categorical(['a', 'b', 'a', 'c'], dtype=dtype)
        expected = Categorical(['a', 'b', 'a', 'c'], categories=categories,
                               ordered=ordered)
        tm.assert_categorical_equal(result, expected)
        assert result.ordered is ordered 
Example #16
Source File: test_constructors.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_constructor_from_categorical_with_unknown_dtype(self):
        dtype = CategoricalDtype(None, ordered=True)
        values = Categorical(['a', 'b', 'd'])
        result = Categorical(values, dtype=dtype)
        # We use values.categories, not dtype.categories
        expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'd'],
                               ordered=True)
        tm.assert_categorical_equal(result, expected) 
Example #17
Source File: test_constructors.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_from_codes_with_nan_code(self):
        # GH21767
        codes = [1, 2, np.nan]
        dtype = CategoricalDtype(categories=['a', 'b', 'c'])
        with pytest.raises(ValueError,
                           match="codes need to be array-like integers"):
            Categorical.from_codes(codes, categories=dtype.categories)
        with pytest.raises(ValueError,
                           match="codes need to be array-like integers"):
            Categorical.from_codes(codes, dtype=dtype) 
Example #18
Source File: test_constructors.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_from_inferred_categories_coerces(self):
        cats = ['1', '2', 'bad']
        codes = np.array([0, 0, 1, 2], dtype='i8')
        dtype = CategoricalDtype([1, 2])
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical([1, 1, 2, np.nan])
        tm.assert_categorical_equal(result, expected) 
Example #19
Source File: test_constructors.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_from_codes(self):

        # too few categories
        dtype = CategoricalDtype(categories=[1, 2])
        msg = "codes need to be between "
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([1, 2], categories=dtype.categories)
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([1, 2], dtype=dtype)

        # no int codes
        msg = "codes need to be array-like integers"
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes(["a"], categories=dtype.categories)
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes(["a"], dtype=dtype)

        # no unique categories
        with pytest.raises(ValueError,
                           match="Categorical categories must be unique"):
            Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])

        # NaN categories included
        with pytest.raises(ValueError,
                           match="Categorial categories cannot be null"):
            Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])

        # too negative
        dtype = CategoricalDtype(categories=["a", "b", "c"])
        msg = r"codes need to be between -1 and len\(categories\)-1"
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
        with pytest.raises(ValueError, match=msg):
            Categorical.from_codes([-2, 1, 2], dtype=dtype)

        exp = Categorical(["a", "b", "c"], ordered=False)
        res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
        tm.assert_categorical_equal(exp, res)

        res = Categorical.from_codes([0, 1, 2], dtype=dtype)
        tm.assert_categorical_equal(exp, res) 
Example #20
Source File: test_constructors.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_from_inferred_categories_dtype(self):
        cats = ['a', 'b', 'd']
        codes = np.array([0, 1, 0, 2], dtype='i8')
        dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True)
        result = Categorical._from_inferred_categories(cats, codes, dtype)
        expected = Categorical(['a', 'b', 'a', 'd'],
                               categories=['c', 'b', 'a'],
                               ordered=True)
        tm.assert_categorical_equal(result, expected) 
Example #21
Source File: test_constructors.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_constructor_from_categorical_with_dtype(self):
        dtype = CategoricalDtype(['a', 'b', 'c'], ordered=True)
        values = Categorical(['a', 'b', 'd'])
        result = Categorical(values, dtype=dtype)
        # We use dtype.categories, not values.categories
        expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
                               ordered=True)
        tm.assert_categorical_equal(result, expected) 
Example #22
Source File: test_constructors.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_constructor_dtype_and_others_raises(self):
        dtype = CategoricalDtype(['a', 'b'], ordered=True)
        with tm.assert_raises_regex(ValueError, "Cannot"):
            Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype)

        with tm.assert_raises_regex(ValueError, "Cannot"):
            Categorical(['a', 'b'], ordered=True, dtype=dtype)

        with tm.assert_raises_regex(ValueError, "Cannot"):
            Categorical(['a', 'b'], ordered=False, dtype=dtype) 
Example #23
Source File: test_merge.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_other_columns(self, left, right):
        # non-merge columns should preserve if possible
        right = right.assign(Z=right.Z.astype('category'))

        merged = pd.merge(left, right, on='X')
        result = merged.dtypes.sort_index()
        expected = Series([CategoricalDtype(),
                           np.dtype('O'),
                           CategoricalDtype()],
                          index=['X', 'Y', 'Z'])
        assert_series_equal(result, expected)

        # categories are preserved
        assert left.X.values.is_dtype_equal(merged.X.values)
        assert right.Z.values.is_dtype_equal(merged.Z.values) 
Example #24
Source File: test_dtypes.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_set_dtype_no_overlap(self):
        c = Categorical(['a', 'b', 'c'], ['d', 'e'])
        result = c._set_dtype(CategoricalDtype(['a', 'b']))
        expected = Categorical([None, None, None], categories=['a', 'b'])
        tm.assert_categorical_equal(result, expected) 
Example #25
Source File: test_dtypes.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_set_dtype_same(self):
        c = Categorical(['a', 'b', 'c'])
        result = c._set_dtype(CategoricalDtype(['a', 'b', 'c']))
        tm.assert_categorical_equal(result, c) 
Example #26
Source File: test_missing.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_set_dtype_nans(self):
        c = Categorical(['a', 'b', np.nan])
        result = c._set_dtype(CategoricalDtype(['a', 'c']))
        tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1],
                                                           dtype='int8')) 
Example #27
Source File: test_apply.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_apply(self):
        with np.errstate(all='ignore'):
            # ufunc
            applied = self.frame.apply(np.sqrt)
            tm.assert_series_equal(np.sqrt(self.frame['A']), applied['A'])

            # aggregator
            applied = self.frame.apply(np.mean)
            assert applied['A'] == np.mean(self.frame['A'])

            d = self.frame.index[0]
            applied = self.frame.apply(np.mean, axis=1)
            assert applied[d] == np.mean(self.frame.xs(d))
            assert applied.index is self.frame.index  # want this

        # invalid axis
        df = DataFrame(
            [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
        pytest.raises(ValueError, df.apply, lambda x: x, 2)

        # see gh-9573
        df = DataFrame({'c0': ['A', 'A', 'B', 'B'],
                        'c1': ['C', 'C', 'D', 'D']})
        df = df.apply(lambda ts: ts.astype('category'))

        assert df.shape == (4, 2)
        assert isinstance(df['c0'].dtype, CategoricalDtype)
        assert isinstance(df['c1'].dtype, CategoricalDtype) 
Example #28
Source File: test_indexing.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_assignment(self):
        # assignment
        df = DataFrame({'value': np.array(
            np.random.randint(0, 10000, 100), dtype='int32')})
        labels = Categorical(["{0} - {1}".format(i, i + 499)
                              for i in range(0, 10000, 500)])

        df = df.sort_values(by=['value'], ascending=True)
        s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
        d = s.values
        df['D'] = d
        str(df)

        result = df.dtypes
        expected = Series(
            [np.dtype('int32'), CategoricalDtype(categories=labels,
                                                 ordered=False)],
            index=['value', 'D'])
        tm.assert_series_equal(result, expected)

        df['E'] = s
        str(df)

        result = df.dtypes
        expected = Series([np.dtype('int32'),
                           CategoricalDtype(categories=labels, ordered=False),
                           CategoricalDtype(categories=labels, ordered=False)],
                          index=['value', 'D', 'E'])
        tm.assert_series_equal(result, expected)

        result1 = df['D']
        result2 = df['E']
        tm.assert_categorical_equal(result1._data._block.values, d)

        # sorting
        s.name = 'E'
        tm.assert_series_equal(result2.sort_index(), s.sort_index())

        cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
        df = DataFrame(Series(cat)) 
Example #29
Source File: test_algos.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_value_counts(self):
        np.random.seed(1234)
        from pandas.core.reshape.tile import cut

        arr = np.random.randn(4)
        factor = cut(arr, 4)

        # assert isinstance(factor, n)
        result = algos.value_counts(factor)
        breaks = [-1.194, -0.535, 0.121, 0.777, 1.433]
        index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True))
        expected = Series([1, 1, 1, 1], index=index)
        tm.assert_series_equal(result.sort_index(), expected.sort_index()) 
Example #30
Source File: test_category.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_construction_with_categorical_dtype(self):
        # construction with CategoricalDtype
        # GH18109
        data, cats, ordered = 'a a b b'.split(), 'c b a'.split(), True
        dtype = CategoricalDtype(categories=cats, ordered=ordered)

        result = CategoricalIndex(data, dtype=dtype)
        expected = CategoricalIndex(data, categories=cats, ordered=ordered)
        tm.assert_index_equal(result, expected, exact=True)

        # GH 19032
        result = Index(data, dtype=dtype)
        tm.assert_index_equal(result, expected, exact=True)

        # error when combining categories/ordered and dtype kwargs
        msg = "Cannot specify `categories` or `ordered` together with `dtype`."
        with pytest.raises(ValueError, match=msg):
            CategoricalIndex(data, categories=cats, dtype=dtype)

        with pytest.raises(ValueError, match=msg):
            Index(data, categories=cats, dtype=dtype)

        with pytest.raises(ValueError, match=msg):
            CategoricalIndex(data, ordered=ordered, dtype=dtype)

        with pytest.raises(ValueError, match=msg):
            Index(data, ordered=ordered, dtype=dtype)