Python pandas.core.dtypes.dtypes.CategoricalDtype() Examples
The following are 30
code examples of pandas.core.dtypes.dtypes.CategoricalDtype().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas.core.dtypes.dtypes
, or try the search function
.
Example #1
Source File: dtypes.py From vnpy_crypto with MIT License | 6 votes |
def test_categoricaldtype_coerces_datetime(self): dtype = { 'b': CategoricalDtype(pd.date_range('2017', '2019', freq='AS')) } data = "b\n2017-01-01\n2018-01-01\n2019-01-01" expected = pd.DataFrame({'b': Categorical(dtype['b'].categories)}) result = self.read_csv(StringIO(data), dtype=dtype) tm.assert_frame_equal(result, expected) dtype = { 'b': CategoricalDtype([pd.Timestamp("2014")]) } data = "b\n2014-01-01\n2014-01-01T00:00:00" expected = pd.DataFrame({'b': Categorical([pd.Timestamp('2014')] * 2)}) result = self.read_csv(StringIO(data), dtype=dtype) tm.assert_frame_equal(result, expected)
Example #2
Source File: test_concat.py From recruit with Apache License 2.0 | 6 votes |
def test_categorical_index_preserver(self): a = Series(np.arange(6, dtype='int64')) b = Series(list('aabbca')) df2 = DataFrame({'A': a, 'B': b.astype(CategoricalDtype(list('cab'))) }).set_index('B') result = pd.concat([df2, df2]) expected = DataFrame( {'A': pd.concat([a, a]), 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab'))) }).set_index('B') tm.assert_frame_equal(result, expected) # wrong catgories df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe')) }).set_index('B') msg = "categories must match existing categories when appending" with pytest.raises(TypeError, match=msg): pd.concat([df2, df3])
Example #3
Source File: dtypes.py From vnpy_crypto with MIT License | 6 votes |
def test_categorical_categoricaldtype_chunksize(self): # GH 10153 data = """a,b 1,a 1,b 1,b 2,c""" cats = ['a', 'b', 'c'] expecteds = [pd.DataFrame({'a': [1, 1], 'b': Categorical(['a', 'b'], categories=cats)}), pd.DataFrame({'a': [1, 2], 'b': Categorical(['b', 'c'], categories=cats)}, index=[2, 3])] dtype = CategoricalDtype(cats) actuals = self.read_csv(StringIO(data), dtype={'b': dtype}, chunksize=2) for actual, expected in zip(actuals, expecteds): tm.assert_frame_equal(actual, expected)
Example #4
Source File: test_categorical.py From recruit with Apache License 2.0 | 6 votes |
def test_getitem_category_type(self): # GH 14580 # test iloc() on Series with Categorical data s = Series([1, 2, 3]).astype('category') # get slice result = s.iloc[0:2] expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get list of indexes result = s.iloc[[0, 1]] expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get boolean array result = s.iloc[[True, False, False]] expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected)
Example #5
Source File: test_dtypes.py From vnpy_crypto with MIT License | 6 votes |
def test_astype_category(self, dtype_ordered, cat_ordered): # GH 10696/18593 data = list('abcaacbab') cat = Categorical(data, categories=list('bac'), ordered=cat_ordered) # standard categories dtype = CategoricalDtype(ordered=dtype_ordered) result = cat.astype(dtype) expected = Categorical( data, categories=cat.categories, ordered=dtype_ordered) tm.assert_categorical_equal(result, expected) # non-standard categories dtype = CategoricalDtype(list('adc'), dtype_ordered) result = cat.astype(dtype) expected = Categorical(data, dtype=dtype) tm.assert_categorical_equal(result, expected) if dtype_ordered is False: # dtype='category' can't specify ordered, so only test once result = cat.astype('category') expected = cat tm.assert_categorical_equal(result, expected)
Example #6
Source File: common.py From recruit with Apache License 2.0 | 6 votes |
def test_astype_category(self, copy, name, ordered): # GH 18630 index = self.create_index() if name: index = index.rename(name) # standard categories dtype = CategoricalDtype(ordered=ordered) result = index.astype(dtype, copy=copy) expected = CategoricalIndex(index.values, name=name, ordered=ordered) tm.assert_index_equal(result, expected) # non-standard categories dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered) result = index.astype(dtype, copy=copy) expected = CategoricalIndex(index.values, name=name, dtype=dtype) tm.assert_index_equal(result, expected) if ordered is False: # dtype='category' defaults to ordered=False, so only test once result = index.astype('category', copy=copy) expected = CategoricalIndex(index.values, name=name) tm.assert_index_equal(result, expected)
Example #7
Source File: test_dtypes.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_astype_category(self, dtype_ordered, cat_ordered): # GH 10696/18593 data = list('abcaacbab') cat = Categorical(data, categories=list('bac'), ordered=cat_ordered) # standard categories dtype = CategoricalDtype(ordered=dtype_ordered) result = cat.astype(dtype) expected = Categorical( data, categories=cat.categories, ordered=dtype_ordered) tm.assert_categorical_equal(result, expected) # non-standard categories dtype = CategoricalDtype(list('adc'), dtype_ordered) result = cat.astype(dtype) expected = Categorical(data, dtype=dtype) tm.assert_categorical_equal(result, expected) if dtype_ordered is False: # dtype='category' can't specify ordered, so only test once result = cat.astype('category') expected = cat tm.assert_categorical_equal(result, expected)
Example #8
Source File: dtypes.py From vnpy_crypto with MIT License | 6 votes |
def test_categorical_categoricaldtype(self, categories, ordered): data = """a,b 1,a 1,b 1,b 2,c""" expected = pd.DataFrame({ "a": [1, 1, 1, 2], "b": Categorical(['a', 'b', 'b', 'c'], categories=categories, ordered=ordered) }) dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)} result = self.read_csv(StringIO(data), dtype=dtype) tm.assert_frame_equal(result, expected)
Example #9
Source File: common.py From vnpy_crypto with MIT License | 6 votes |
def test_astype_category(self, copy, name, ordered): # GH 18630 index = self.create_index() if name: index = index.rename(name) # standard categories dtype = CategoricalDtype(ordered=ordered) result = index.astype(dtype, copy=copy) expected = CategoricalIndex(index.values, name=name, ordered=ordered) tm.assert_index_equal(result, expected) # non-standard categories dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered) result = index.astype(dtype, copy=copy) expected = CategoricalIndex(index.values, name=name, dtype=dtype) tm.assert_index_equal(result, expected) if ordered is False: # dtype='category' defaults to ordered=False, so only test once result = index.astype('category', copy=copy) expected = CategoricalIndex(index.values, name=name) tm.assert_index_equal(result, expected)
Example #10
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_categorical_category_dtype(all_parsers, categories, ordered): parser = all_parsers data = """a,b 1,a 1,b 1,b 2,c""" expected = DataFrame({ "a": [1, 1, 1, 2], "b": Categorical(["a", "b", "b", "c"], categories=categories, ordered=ordered) }) dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)} result = parser.read_csv(StringIO(data), dtype=dtype) tm.assert_frame_equal(result, expected)
Example #11
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_categorical_dtype_chunksize_explicit_categories(all_parsers): # see gh-10153 parser = all_parsers data = """a,b 1,a 1,b 1,b 2,c""" cats = ["a", "b", "c"] expecteds = [DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}), DataFrame({"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)}, index=[2, 3])] dtype = CategoricalDtype(cats) actuals = parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) for actual, expected in zip(actuals, expecteds): tm.assert_frame_equal(actual, expected)
Example #12
Source File: test_concat.py From vnpy_crypto with MIT License | 6 votes |
def test_categorical_index_preserver(self): a = Series(np.arange(6, dtype='int64')) b = Series(list('aabbca')) df2 = DataFrame({'A': a, 'B': b.astype(CategoricalDtype(list('cab'))) }).set_index('B') result = pd.concat([df2, df2]) expected = DataFrame( {'A': pd.concat([a, a]), 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab'))) }).set_index('B') tm.assert_frame_equal(result, expected) # wrong catgories df3 = DataFrame({'A': a, 'B': Categorical(b, categories=list('abe')) }).set_index('B') pytest.raises(TypeError, lambda: pd.concat([df2, df3]))
Example #13
Source File: test_dtypes.py From recruit with Apache License 2.0 | 6 votes |
def test_astype_category(self, dtype_ordered, cat_ordered): # GH 10696/18593 data = list('abcaacbab') cat = Categorical(data, categories=list('bac'), ordered=cat_ordered) # standard categories dtype = CategoricalDtype(ordered=dtype_ordered) result = cat.astype(dtype) expected = Categorical( data, categories=cat.categories, ordered=dtype_ordered) tm.assert_categorical_equal(result, expected) # non-standard categories dtype = CategoricalDtype(list('adc'), dtype_ordered) result = cat.astype(dtype) expected = Categorical(data, dtype=dtype) tm.assert_categorical_equal(result, expected) if dtype_ordered is False: # dtype='category' can't specify ordered, so only test once result = cat.astype('category') expected = cat tm.assert_categorical_equal(result, expected)
Example #14
Source File: common.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_astype_category(self, copy, name, ordered): # GH 18630 index = self.create_index() if name: index = index.rename(name) # standard categories dtype = CategoricalDtype(ordered=ordered) result = index.astype(dtype, copy=copy) expected = CategoricalIndex(index.values, name=name, ordered=ordered) tm.assert_index_equal(result, expected) # non-standard categories dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered) result = index.astype(dtype, copy=copy) expected = CategoricalIndex(index.values, name=name, dtype=dtype) tm.assert_index_equal(result, expected) if ordered is False: # dtype='category' defaults to ordered=False, so only test once result = index.astype('category', copy=copy) expected = CategoricalIndex(index.values, name=name) tm.assert_index_equal(result, expected)
Example #15
Source File: test_constructors.py From vnpy_crypto with MIT License | 5 votes |
def test_constructor_with_dtype(self, ordered): categories = ['b', 'a', 'c'] dtype = CategoricalDtype(categories, ordered=ordered) result = Categorical(['a', 'b', 'a', 'c'], dtype=dtype) expected = Categorical(['a', 'b', 'a', 'c'], categories=categories, ordered=ordered) tm.assert_categorical_equal(result, expected) assert result.ordered is ordered
Example #16
Source File: test_constructors.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_constructor_from_categorical_with_unknown_dtype(self): dtype = CategoricalDtype(None, ordered=True) values = Categorical(['a', 'b', 'd']) result = Categorical(values, dtype=dtype) # We use values.categories, not dtype.categories expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'd'], ordered=True) tm.assert_categorical_equal(result, expected)
Example #17
Source File: test_constructors.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_from_codes_with_nan_code(self): # GH21767 codes = [1, 2, np.nan] dtype = CategoricalDtype(categories=['a', 'b', 'c']) with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, categories=dtype.categories) with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, dtype=dtype)
Example #18
Source File: test_constructors.py From vnpy_crypto with MIT License | 5 votes |
def test_from_inferred_categories_coerces(self): cats = ['1', '2', 'bad'] codes = np.array([0, 0, 1, 2], dtype='i8') dtype = CategoricalDtype([1, 2]) result = Categorical._from_inferred_categories(cats, codes, dtype) expected = Categorical([1, 1, 2, np.nan]) tm.assert_categorical_equal(result, expected)
Example #19
Source File: test_constructors.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_from_codes(self): # too few categories dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be between " with pytest.raises(ValueError, match=msg): Categorical.from_codes([1, 2], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes([1, 2], dtype=dtype) # no int codes msg = "codes need to be array-like integers" with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], dtype=dtype) # no unique categories with pytest.raises(ValueError, match="Categorical categories must be unique"): Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) # NaN categories included with pytest.raises(ValueError, match="Categorial categories cannot be null"): Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) # too negative dtype = CategoricalDtype(categories=["a", "b", "c"]) msg = r"codes need to be between -1 and len\(categories\)-1" with pytest.raises(ValueError, match=msg): Categorical.from_codes([-2, 1, 2], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes([-2, 1, 2], dtype=dtype) exp = Categorical(["a", "b", "c"], ordered=False) res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) tm.assert_categorical_equal(exp, res) res = Categorical.from_codes([0, 1, 2], dtype=dtype) tm.assert_categorical_equal(exp, res)
Example #20
Source File: test_constructors.py From vnpy_crypto with MIT License | 5 votes |
def test_from_inferred_categories_dtype(self): cats = ['a', 'b', 'd'] codes = np.array([0, 1, 0, 2], dtype='i8') dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True) result = Categorical._from_inferred_categories(cats, codes, dtype) expected = Categorical(['a', 'b', 'a', 'd'], categories=['c', 'b', 'a'], ordered=True) tm.assert_categorical_equal(result, expected)
Example #21
Source File: test_constructors.py From vnpy_crypto with MIT License | 5 votes |
def test_constructor_from_categorical_with_dtype(self): dtype = CategoricalDtype(['a', 'b', 'c'], ordered=True) values = Categorical(['a', 'b', 'd']) result = Categorical(values, dtype=dtype) # We use dtype.categories, not values.categories expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'], ordered=True) tm.assert_categorical_equal(result, expected)
Example #22
Source File: test_constructors.py From vnpy_crypto with MIT License | 5 votes |
def test_constructor_dtype_and_others_raises(self): dtype = CategoricalDtype(['a', 'b'], ordered=True) with tm.assert_raises_regex(ValueError, "Cannot"): Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype) with tm.assert_raises_regex(ValueError, "Cannot"): Categorical(['a', 'b'], ordered=True, dtype=dtype) with tm.assert_raises_regex(ValueError, "Cannot"): Categorical(['a', 'b'], ordered=False, dtype=dtype)
Example #23
Source File: test_merge.py From vnpy_crypto with MIT License | 5 votes |
def test_other_columns(self, left, right): # non-merge columns should preserve if possible right = right.assign(Z=right.Z.astype('category')) merged = pd.merge(left, right, on='X') result = merged.dtypes.sort_index() expected = Series([CategoricalDtype(), np.dtype('O'), CategoricalDtype()], index=['X', 'Y', 'Z']) assert_series_equal(result, expected) # categories are preserved assert left.X.values.is_dtype_equal(merged.X.values) assert right.Z.values.is_dtype_equal(merged.Z.values)
Example #24
Source File: test_dtypes.py From vnpy_crypto with MIT License | 5 votes |
def test_set_dtype_no_overlap(self): c = Categorical(['a', 'b', 'c'], ['d', 'e']) result = c._set_dtype(CategoricalDtype(['a', 'b'])) expected = Categorical([None, None, None], categories=['a', 'b']) tm.assert_categorical_equal(result, expected)
Example #25
Source File: test_dtypes.py From vnpy_crypto with MIT License | 5 votes |
def test_set_dtype_same(self): c = Categorical(['a', 'b', 'c']) result = c._set_dtype(CategoricalDtype(['a', 'b', 'c'])) tm.assert_categorical_equal(result, c)
Example #26
Source File: test_missing.py From vnpy_crypto with MIT License | 5 votes |
def test_set_dtype_nans(self): c = Categorical(['a', 'b', np.nan]) result = c._set_dtype(CategoricalDtype(['a', 'c'])) tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype='int8'))
Example #27
Source File: test_apply.py From vnpy_crypto with MIT License | 5 votes |
def test_apply(self): with np.errstate(all='ignore'): # ufunc applied = self.frame.apply(np.sqrt) tm.assert_series_equal(np.sqrt(self.frame['A']), applied['A']) # aggregator applied = self.frame.apply(np.mean) assert applied['A'] == np.mean(self.frame['A']) d = self.frame.index[0] applied = self.frame.apply(np.mean, axis=1) assert applied[d] == np.mean(self.frame.xs(d)) assert applied.index is self.frame.index # want this # invalid axis df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c']) pytest.raises(ValueError, df.apply, lambda x: x, 2) # see gh-9573 df = DataFrame({'c0': ['A', 'A', 'B', 'B'], 'c1': ['C', 'C', 'D', 'D']}) df = df.apply(lambda ts: ts.astype('category')) assert df.shape == (4, 2) assert isinstance(df['c0'].dtype, CategoricalDtype) assert isinstance(df['c1'].dtype, CategoricalDtype)
Example #28
Source File: test_indexing.py From vnpy_crypto with MIT License | 5 votes |
def test_assignment(self): # assignment df = DataFrame({'value': np.array( np.random.randint(0, 10000, 100), dtype='int32')}) labels = Categorical(["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]) df = df.sort_values(by=['value'], ascending=True) s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) d = s.values df['D'] = d str(df) result = df.dtypes expected = Series( [np.dtype('int32'), CategoricalDtype(categories=labels, ordered=False)], index=['value', 'D']) tm.assert_series_equal(result, expected) df['E'] = s str(df) result = df.dtypes expected = Series([np.dtype('int32'), CategoricalDtype(categories=labels, ordered=False), CategoricalDtype(categories=labels, ordered=False)], index=['value', 'D', 'E']) tm.assert_series_equal(result, expected) result1 = df['D'] result2 = df['E'] tm.assert_categorical_equal(result1._data._block.values, d) # sorting s.name = 'E' tm.assert_series_equal(result2.sort_index(), s.sort_index()) cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) df = DataFrame(Series(cat))
Example #29
Source File: test_algos.py From vnpy_crypto with MIT License | 5 votes |
def test_value_counts(self): np.random.seed(1234) from pandas.core.reshape.tile import cut arr = np.random.randn(4) factor = cut(arr, 4) # assert isinstance(factor, n) result = algos.value_counts(factor) breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True)) expected = Series([1, 1, 1, 1], index=index) tm.assert_series_equal(result.sort_index(), expected.sort_index())
Example #30
Source File: test_category.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_construction_with_categorical_dtype(self): # construction with CategoricalDtype # GH18109 data, cats, ordered = 'a a b b'.split(), 'c b a'.split(), True dtype = CategoricalDtype(categories=cats, ordered=ordered) result = CategoricalIndex(data, dtype=dtype) expected = CategoricalIndex(data, categories=cats, ordered=ordered) tm.assert_index_equal(result, expected, exact=True) # GH 19032 result = Index(data, dtype=dtype) tm.assert_index_equal(result, expected, exact=True) # error when combining categories/ordered and dtype kwargs msg = "Cannot specify `categories` or `ordered` together with `dtype`." with pytest.raises(ValueError, match=msg): CategoricalIndex(data, categories=cats, dtype=dtype) with pytest.raises(ValueError, match=msg): Index(data, categories=cats, dtype=dtype) with pytest.raises(ValueError, match=msg): CategoricalIndex(data, ordered=ordered, dtype=dtype) with pytest.raises(ValueError, match=msg): Index(data, ordered=ordered, dtype=dtype)