Python pandas.core.algorithms.unique1d() Examples

The following are 24 code examples of pandas.core.algorithms.unique1d(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.algorithms , or try the search function .
Example #1
Source File: test_analytics.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_nunique(self, float_frame_with_na, float_frame,
                     float_string_frame):
        f = lambda s: len(algorithms.unique1d(s.dropna()))
        assert_stat_op_calc('nunique', f, float_frame_with_na,
                            has_skipna=False, check_dtype=False,
                            check_dates=True)
        assert_stat_op_api('nunique', float_frame, float_string_frame)

        df = DataFrame({'A': [1, 1, 1],
                        'B': [1, 2, 3],
                        'C': [1, np.nan, 3]})
        tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2}))
        tm.assert_series_equal(df.nunique(dropna=False),
                               Series({'A': 1, 'B': 3, 'C': 3}))
        tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2}))
        tm.assert_series_equal(df.nunique(axis=1, dropna=False),
                               Series({0: 1, 1: 3, 2: 2})) 
Example #2
Source File: test_analytics.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_nunique(self, float_frame_with_na, float_frame,
                     float_string_frame):
        f = lambda s: len(algorithms.unique1d(s.dropna()))
        assert_stat_op_calc('nunique', f, float_frame_with_na,
                            has_skipna=False, check_dtype=False,
                            check_dates=True)
        assert_stat_op_api('nunique', float_frame, float_string_frame)

        df = DataFrame({'A': [1, 1, 1],
                        'B': [1, 2, 3],
                        'C': [1, np.nan, 3]})
        tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2}))
        tm.assert_series_equal(df.nunique(dropna=False),
                               Series({'A': 1, 'B': 3, 'C': 3}))
        tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2}))
        tm.assert_series_equal(df.nunique(axis=1, dropna=False),
                               Series({0: 1, 1: 3, 2: 2})) 
Example #3
Source File: base.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def unique(self):
        values = self._values

        if hasattr(values, 'unique'):

            result = values.unique()
        else:
            from pandas.core.algorithms import unique1d
            result = unique1d(values)

        return result 
Example #4
Source File: test_analytics.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_nunique(self):
        f = lambda s: len(algorithms.unique1d(s.dropna()))
        self._check_stat_op('nunique', f, has_skipna=False,
                            check_dtype=False, check_dates=True)

        df = DataFrame({'A': [1, 1, 1],
                        'B': [1, 2, 3],
                        'C': [1, np.nan, 3]})
        tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2}))
        tm.assert_series_equal(df.nunique(dropna=False),
                               Series({'A': 1, 'B': 3, 'C': 3}))
        tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2}))
        tm.assert_series_equal(df.nunique(axis=1, dropna=False),
                               Series({0: 1, 1: 3, 2: 2})) 
Example #5
Source File: base.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def unique(self):
        values = self._values

        if hasattr(values, 'unique'):
            result = values.unique()
        else:
            from pandas.core.algorithms import unique1d
            result = unique1d(values)

        return result 
Example #6
Source File: test_analytics.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_nunique(self):
        f = lambda s: len(algorithms.unique1d(s.dropna()))
        self._check_stat_op('nunique', f, has_skipna=False,
                            check_dtype=False, check_dates=True)

        df = DataFrame({'A': [1, 1, 1],
                        'B': [1, 2, 3],
                        'C': [1, np.nan, 3]})
        tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2}))
        tm.assert_series_equal(df.nunique(dropna=False),
                               Series({'A': 1, 'B': 3, 'C': 3}))
        tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2}))
        tm.assert_series_equal(df.nunique(axis=1, dropna=False),
                               Series({0: 1, 1: 3, 2: 2})) 
Example #7
Source File: base.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def unique(self):
        values = self._values

        if hasattr(values, 'unique'):
            result = values.unique()
        else:
            from pandas.core.algorithms import unique1d
            result = unique1d(values)

        return result 
Example #8
Source File: period.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def unique(self, level=None):
        # override the Index.unique method for performance GH#23083
        if level is not None:
            # this should never occur, but is retained to make the signature
            # match Index.unique
            self._validate_index_level(level)

        values = self._ndarray_values
        result = unique1d(values)
        return self._shallow_copy(result) 
Example #9
Source File: base.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def unique(self):
        values = self._values

        if hasattr(values, 'unique'):

            result = values.unique()
        else:
            from pandas.core.algorithms import unique1d
            result = unique1d(values)

        return result 
Example #10
Source File: test_analytics.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_nunique(self):
        f = lambda s: len(algorithms.unique1d(s.dropna()))
        self._check_stat_op('nunique', f, has_skipna=False,
                            check_dtype=False, check_dates=True)

        df = DataFrame({'A': [1, 1, 1],
                        'B': [1, 2, 3],
                        'C': [1, np.nan, 3]})
        tm.assert_series_equal(df.nunique(), Series({'A': 1, 'B': 3, 'C': 2}))
        tm.assert_series_equal(df.nunique(dropna=False),
                               Series({'A': 1, 'B': 3, 'C': 3}))
        tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2}))
        tm.assert_series_equal(df.nunique(axis=1, dropna=False),
                               Series({0: 1, 1: 3, 2: 2})) 
Example #11
Source File: base.py    From recruit with Apache License 2.0 5 votes vote down vote up
def unique(self):
        values = self._values

        if hasattr(values, 'unique'):

            result = values.unique()
        else:
            from pandas.core.algorithms import unique1d
            result = unique1d(values)

        return result 
Example #12
Source File: period.py    From recruit with Apache License 2.0 5 votes vote down vote up
def unique(self, level=None):
        # override the Index.unique method for performance GH#23083
        if level is not None:
            # this should never occur, but is retained to make the signature
            # match Index.unique
            self._validate_index_level(level)

        values = self._ndarray_values
        result = unique1d(values)
        return self._shallow_copy(result) 
Example #13
Source File: groupby.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def _concat_objects(self, keys, values, not_indexed_same=False):
        from pandas.core.reshape.concat import concat

        def reset_identity(values):
            # reset the identities of the components
            # of the values to prevent aliasing
            for v in com._not_none(*values):
                ax = v._get_axis(self.axis)
                ax._reset_identity()
            return values

        if not not_indexed_same:
            result = concat(values, axis=self.axis)
            ax = self._selected_obj._get_axis(self.axis)

            if isinstance(result, Series):
                result = result.reindex(ax)
            else:

                # this is a very unfortunate situation
                # we have a multi-index that is NOT lexsorted
                # and we have a result which is duplicated
                # we can't reindex, so we resort to this
                # GH 14776
                if isinstance(ax, MultiIndex) and not ax.is_unique:
                    indexer = algorithms.unique1d(
                        result.index.get_indexer_for(ax.values))
                    result = result.take(indexer, axis=self.axis)
                else:
                    result = result.reindex(ax, axis=self.axis)

        elif self.group_keys:

            values = reset_identity(values)
            if self.as_index:

                # possible MI return case
                group_keys = keys
                group_levels = self.grouper.levels
                group_names = self.grouper.names

                result = concat(values, axis=self.axis, keys=group_keys,
                                levels=group_levels, names=group_names,
                                sort=False)
            else:

                # GH5610, returns a MI, with the first level being a
                # range index
                keys = list(range(len(values)))
                result = concat(values, axis=self.axis, keys=keys)
        else:
            values = reset_identity(values)
            result = concat(values, axis=self.axis)

        if (isinstance(result, Series) and
                getattr(self, '_selection_name', None) is not None):

            result.name = self._selection_name

        return result 
Example #14
Source File: categorical.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 4 votes vote down vote up
def unique(self):
        """
        Return the ``Categorical`` which ``categories`` and ``codes`` are
        unique. Unused categories are NOT returned.

        - unordered category: values and categories are sorted by appearance
          order.
        - ordered category: values are sorted by appearance order, categories
          keeps existing order.

        Returns
        -------
        unique values : ``Categorical``

        Examples
        --------
        An unordered Categorical will return categories in the
        order of appearance.

        >>> pd.Categorical(list('baabc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        >>> pd.Categorical(list('baabc'), categories=list('abc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        An ordered Categorical preserves the category ordering.

        >>> pd.Categorical(list('baabc'),
        ...                categories=list('abc'),
        ...                ordered=True)
        [b, a, c]
        Categories (3, object): [a < b < c]

        See Also
        --------
        unique
        CategoricalIndex.unique
        Series.unique

        """

        # unlike np.unique, unique1d does not sort
        unique_codes = unique1d(self.codes)
        cat = self.copy()

        # keep nan in codes
        cat._codes = unique_codes

        # exclude nan from indexer for categories
        take_codes = unique_codes[unique_codes != -1]
        if self.ordered:
            take_codes = np.sort(take_codes)
        return cat.set_categories(cat.categories.take(take_codes)) 
Example #15
Source File: categorical.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def unique(self):
        """
        Return the ``Categorical`` which ``categories`` and ``codes`` are
        unique. Unused categories are NOT returned.

        - unordered category: values and categories are sorted by appearance
          order.
        - ordered category: values are sorted by appearance order, categories
          keeps existing order.

        Returns
        -------
        unique values : ``Categorical``

        Examples
        --------
        An unordered Categorical will return categories in the
        order of appearance.

        >>> pd.Categorical(list('baabc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        >>> pd.Categorical(list('baabc'), categories=list('abc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        An ordered Categorical preserves the category ordering.

        >>> pd.Categorical(list('baabc'),
        ...                categories=list('abc'),
        ...                ordered=True)
        [b, a, c]
        Categories (3, object): [a < b < c]

        See Also
        --------
        unique
        CategoricalIndex.unique
        Series.unique

        """

        # unlike np.unique, unique1d does not sort
        unique_codes = unique1d(self.codes)
        cat = self.copy()

        # keep nan in codes
        cat._codes = unique_codes

        # exclude nan from indexer for categories
        take_codes = unique_codes[unique_codes != -1]
        if self.ordered:
            take_codes = np.sort(take_codes)
        return cat.set_categories(cat.categories.take(take_codes)) 
Example #16
Source File: groupby.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 4 votes vote down vote up
def _concat_objects(self, keys, values, not_indexed_same=False):
        from pandas.core.reshape.concat import concat

        def reset_identity(values):
            # reset the identities of the components
            # of the values to prevent aliasing
            for v in com._not_none(*values):
                ax = v._get_axis(self.axis)
                ax._reset_identity()
            return values

        if not not_indexed_same:
            result = concat(values, axis=self.axis)
            ax = self._selected_obj._get_axis(self.axis)

            if isinstance(result, Series):
                result = result.reindex(ax)
            else:

                # this is a very unfortunate situation
                # we have a multi-index that is NOT lexsorted
                # and we have a result which is duplicated
                # we can't reindex, so we resort to this
                # GH 14776
                if isinstance(ax, MultiIndex) and not ax.is_unique:
                    indexer = algorithms.unique1d(
                        result.index.get_indexer_for(ax.values))
                    result = result.take(indexer, axis=self.axis)
                else:
                    result = result.reindex(ax, axis=self.axis)

        elif self.group_keys:

            values = reset_identity(values)
            if self.as_index:

                # possible MI return case
                group_keys = keys
                group_levels = self.grouper.levels
                group_names = self.grouper.names

                result = concat(values, axis=self.axis, keys=group_keys,
                                levels=group_levels, names=group_names,
                                sort=False)
            else:

                # GH5610, returns a MI, with the first level being a
                # range index
                keys = list(range(len(values)))
                result = concat(values, axis=self.axis, keys=keys)
        else:
            values = reset_identity(values)
            result = concat(values, axis=self.axis)

        if (isinstance(result, Series) and
                getattr(self, '_selection_name', None) is not None):

            result.name = self._selection_name

        return result 
Example #17
Source File: base.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def intersection(self, other):
        """
        Form the intersection of two Index objects.

        This returns a new Index with elements common to the index and `other`,
        preserving the order of the calling index.

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        intersection : Index

        Examples
        --------

        >>> idx1 = pd.Index([1, 2, 3, 4])
        >>> idx2 = pd.Index([3, 4, 5, 6])
        >>> idx1.intersection(idx2)
        Int64Index([3, 4], dtype='int64')

        """
        self._assert_can_do_setop(other)
        other = _ensure_index(other)

        if self.equals(other):
            return self._get_consensus_name(other)

        if not is_dtype_equal(self.dtype, other.dtype):
            this = self.astype('O')
            other = other.astype('O')
            return this.intersection(other)

        if self.is_monotonic and other.is_monotonic:
            try:
                result = self._inner_indexer(self._values, other._values)[0]
                return self._wrap_union_result(other, result)
            except TypeError:
                pass

        try:
            indexer = Index(other._values).get_indexer(self._values)
            indexer = indexer.take((indexer != -1).nonzero()[0])
        except Exception:
            # duplicates
            indexer = algos.unique1d(
                Index(other._values).get_indexer_non_unique(self._values)[0])
            indexer = indexer[indexer != -1]

        taken = other.take(indexer)
        if self.name != other.name:
            taken.name = None
        return taken 
Example #18
Source File: groupby.py    From recruit with Apache License 2.0 4 votes vote down vote up
def _concat_objects(self, keys, values, not_indexed_same=False):
        from pandas.core.reshape.concat import concat

        def reset_identity(values):
            # reset the identities of the components
            # of the values to prevent aliasing
            for v in com._not_none(*values):
                ax = v._get_axis(self.axis)
                ax._reset_identity()
            return values

        if not not_indexed_same:
            result = concat(values, axis=self.axis)
            ax = self._selected_obj._get_axis(self.axis)

            if isinstance(result, Series):
                result = result.reindex(ax)
            else:

                # this is a very unfortunate situation
                # we have a multi-index that is NOT lexsorted
                # and we have a result which is duplicated
                # we can't reindex, so we resort to this
                # GH 14776
                if isinstance(ax, MultiIndex) and not ax.is_unique:
                    indexer = algorithms.unique1d(
                        result.index.get_indexer_for(ax.values))
                    result = result.take(indexer, axis=self.axis)
                else:
                    result = result.reindex(ax, axis=self.axis)

        elif self.group_keys:

            values = reset_identity(values)
            if self.as_index:

                # possible MI return case
                group_keys = keys
                group_levels = self.grouper.levels
                group_names = self.grouper.names

                result = concat(values, axis=self.axis, keys=group_keys,
                                levels=group_levels, names=group_names,
                                sort=False)
            else:

                # GH5610, returns a MI, with the first level being a
                # range index
                keys = list(range(len(values)))
                result = concat(values, axis=self.axis, keys=keys)
        else:
            values = reset_identity(values)
            result = concat(values, axis=self.axis)

        if (isinstance(result, Series) and
                getattr(self, '_selection_name', None) is not None):

            result.name = self._selection_name

        return result 
Example #19
Source File: groupby.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def _concat_objects(self, keys, values, not_indexed_same=False):
        from pandas.core.reshape.concat import concat

        def reset_identity(values):
            # reset the identities of the components
            # of the values to prevent aliasing
            for v in _not_none(*values):
                ax = v._get_axis(self.axis)
                ax._reset_identity()
            return values

        if not not_indexed_same:
            result = concat(values, axis=self.axis)
            ax = self._selected_obj._get_axis(self.axis)

            if isinstance(result, Series):
                result = result.reindex(ax)
            else:

                # this is a very unfortunate situation
                # we have a multi-index that is NOT lexsorted
                # and we have a result which is duplicated
                # we can't reindex, so we resort to this
                # GH 14776
                if isinstance(ax, MultiIndex) and not ax.is_unique:
                    indexer = algorithms.unique1d(
                        result.index.get_indexer_for(ax.values))
                    result = result.take(indexer, axis=self.axis)
                else:
                    result = result.reindex(ax, axis=self.axis)

        elif self.group_keys:

            values = reset_identity(values)
            if self.as_index:

                # possible MI return case
                group_keys = keys
                group_levels = self.grouper.levels
                group_names = self.grouper.names

                result = concat(values, axis=self.axis, keys=group_keys,
                                levels=group_levels, names=group_names)
            else:

                # GH5610, returns a MI, with the first level being a
                # range index
                keys = list(range(len(values)))
                result = concat(values, axis=self.axis, keys=keys)
        else:
            values = reset_identity(values)
            result = concat(values, axis=self.axis)

        if (isinstance(result, Series) and
                getattr(self, '_selection_name', None) is not None):

            result.name = self._selection_name

        return result 
Example #20
Source File: categorical.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def unique(self):
        """
        Return the ``Categorical`` which ``categories`` and ``codes`` are
        unique. Unused categories are NOT returned.

        - unordered category: values and categories are sorted by appearance
          order.
        - ordered category: values are sorted by appearance order, categories
          keeps existing order.

        Returns
        -------
        unique values : ``Categorical``

        Examples
        --------
        An unordered Categorical will return categories in the
        order of appearance.

        >>> pd.Categorical(list('baabc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        >>> pd.Categorical(list('baabc'), categories=list('abc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        An ordered Categorical preserves the category ordering.

        >>> pd.Categorical(list('baabc'),
        ...                categories=list('abc'),
        ...                ordered=True)
        [b, a, c]
        Categories (3, object): [a < b < c]

        See Also
        --------
        unique
        CategoricalIndex.unique
        Series.unique

        """

        # unlike np.unique, unique1d does not sort
        unique_codes = unique1d(self.codes)
        cat = self.copy()

        # keep nan in codes
        cat._codes = unique_codes

        # exclude nan from indexer for categories
        take_codes = unique_codes[unique_codes != -1]
        if self.ordered:
            take_codes = sorted(take_codes)
        return cat.set_categories(cat.categories.take(take_codes)) 
Example #21
Source File: base.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def intersection(self, other):
        """
        Form the intersection of two Index objects.

        This returns a new Index with elements common to the index and `other`,
        preserving the order of the calling index.

        Parameters
        ----------
        other : Index or array-like

        Returns
        -------
        intersection : Index

        Examples
        --------

        >>> idx1 = pd.Index([1, 2, 3, 4])
        >>> idx2 = pd.Index([3, 4, 5, 6])
        >>> idx1.intersection(idx2)
        Int64Index([3, 4], dtype='int64')

        """
        self._assert_can_do_setop(other)
        other = _ensure_index(other)

        if self.equals(other):
            return self._get_consensus_name(other)

        if not is_dtype_equal(self.dtype, other.dtype):
            this = self.astype('O')
            other = other.astype('O')
            return this.intersection(other)

        if self.is_monotonic and other.is_monotonic:
            try:
                result = self._inner_indexer(self._values, other._values)[0]
                return self._wrap_union_result(other, result)
            except TypeError:
                pass

        try:
            indexer = Index(other._values).get_indexer(self._values)
            indexer = indexer.take((indexer != -1).nonzero()[0])
        except:
            # duplicates
            indexer = algos.unique1d(
                Index(other._values).get_indexer_non_unique(self._values)[0])
            indexer = indexer[indexer != -1]

        taken = other.take(indexer)
        if self.name != other.name:
            taken.name = None
        return taken 
Example #22
Source File: groupby.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def _concat_objects(self, keys, values, not_indexed_same=False):
        from pandas.core.reshape.concat import concat

        def reset_identity(values):
            # reset the identities of the components
            # of the values to prevent aliasing
            for v in _not_none(*values):
                ax = v._get_axis(self.axis)
                ax._reset_identity()
            return values

        if not not_indexed_same:
            result = concat(values, axis=self.axis)
            ax = self._selected_obj._get_axis(self.axis)

            if isinstance(result, Series):
                result = result.reindex(ax)
            else:

                # this is a very unfortunate situation
                # we have a multi-index that is NOT lexsorted
                # and we have a result which is duplicated
                # we can't reindex, so we resort to this
                # GH 14776
                if isinstance(ax, MultiIndex) and not ax.is_unique:
                    indexer = algorithms.unique1d(
                        result.index.get_indexer_for(ax.values))
                    result = result.take(indexer, axis=self.axis)
                else:
                    result = result.reindex(ax, axis=self.axis)

        elif self.group_keys:

            values = reset_identity(values)
            if self.as_index:

                # possible MI return case
                group_keys = keys
                group_levels = self.grouper.levels
                group_names = self.grouper.names

                result = concat(values, axis=self.axis, keys=group_keys,
                                levels=group_levels, names=group_names)
            else:

                # GH5610, returns a MI, with the first level being a
                # range index
                keys = list(range(len(values)))
                result = concat(values, axis=self.axis, keys=keys)
        else:
            values = reset_identity(values)
            result = concat(values, axis=self.axis)

        if (isinstance(result, Series) and
                getattr(self, '_selection_name', None) is not None):

            result.name = self._selection_name

        return result 
Example #23
Source File: categorical.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def unique(self):
        """
        Return the ``Categorical`` which ``categories`` and ``codes`` are
        unique. Unused categories are NOT returned.

        - unordered category: values and categories are sorted by appearance
          order.
        - ordered category: values are sorted by appearance order, categories
          keeps existing order.

        Returns
        -------
        unique values : ``Categorical``

        Examples
        --------
        An unordered Categorical will return categories in the
        order of appearance.

        >>> pd.Categorical(list('baabc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        >>> pd.Categorical(list('baabc'), categories=list('abc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        An ordered Categorical preserves the category ordering.

        >>> pd.Categorical(list('baabc'),
        ...                categories=list('abc'),
        ...                ordered=True)
        [b, a, c]
        Categories (3, object): [a < b < c]

        See Also
        --------
        unique
        CategoricalIndex.unique
        Series.unique

        """

        # unlike np.unique, unique1d does not sort
        unique_codes = unique1d(self.codes)
        cat = self.copy()

        # keep nan in codes
        cat._codes = unique_codes

        # exclude nan from indexer for categories
        take_codes = unique_codes[unique_codes != -1]
        if self.ordered:
            take_codes = sorted(take_codes)
        return cat.set_categories(cat.categories.take(take_codes)) 
Example #24
Source File: categorical.py    From recruit with Apache License 2.0 4 votes vote down vote up
def unique(self):
        """
        Return the ``Categorical`` which ``categories`` and ``codes`` are
        unique. Unused categories are NOT returned.

        - unordered category: values and categories are sorted by appearance
          order.
        - ordered category: values are sorted by appearance order, categories
          keeps existing order.

        Returns
        -------
        unique values : ``Categorical``

        Examples
        --------
        An unordered Categorical will return categories in the
        order of appearance.

        >>> pd.Categorical(list('baabc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        >>> pd.Categorical(list('baabc'), categories=list('abc'))
        [b, a, c]
        Categories (3, object): [b, a, c]

        An ordered Categorical preserves the category ordering.

        >>> pd.Categorical(list('baabc'),
        ...                categories=list('abc'),
        ...                ordered=True)
        [b, a, c]
        Categories (3, object): [a < b < c]

        See Also
        --------
        unique
        CategoricalIndex.unique
        Series.unique

        """

        # unlike np.unique, unique1d does not sort
        unique_codes = unique1d(self.codes)
        cat = self.copy()

        # keep nan in codes
        cat._codes = unique_codes

        # exclude nan from indexer for categories
        take_codes = unique_codes[unique_codes != -1]
        if self.ordered:
            take_codes = np.sort(take_codes)
        return cat.set_categories(cat.categories.take(take_codes))