Python pandas.core.algorithms.take_1d() Examples

The following are 30 code examples of pandas.core.algorithms.take_1d(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.algorithms , or try the search function .
Example #1
Source File: categorical.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def __array__(self, dtype=None):
        """
        The numpy array interface.

        Returns
        -------
        values : numpy array
            A numpy array of either the specified dtype or,
            if dtype==None (default), the same dtype as
            categorical.categories.dtype
        """
        ret = take_1d(self.categories.values, self._codes)
        if dtype and not is_dtype_equal(dtype, self.categories.dtype):
            return np.asarray(ret, dtype)
        if is_extension_array_dtype(ret):
            # When we're a Categorical[ExtensionArray], like Interval,
            # we need to ensure __array__ get's all the way to an
            # ndarray.
            ret = np.asarray(ret)
        return ret 
Example #2
Source File: groupby.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def _transform_fast(self, result, obj):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = (self.size().fillna(0) > 0).any()

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index) 
Example #3
Source File: categorical.py    From recruit with Apache License 2.0 6 votes vote down vote up
def __array__(self, dtype=None):
        """
        The numpy array interface.

        Returns
        -------
        values : numpy array
            A numpy array of either the specified dtype or,
            if dtype==None (default), the same dtype as
            categorical.categories.dtype
        """
        ret = take_1d(self.categories.values, self._codes)
        if dtype and not is_dtype_equal(dtype, self.categories.dtype):
            return np.asarray(ret, dtype)
        if is_extension_array_dtype(ret):
            # When we're a Categorical[ExtensionArray], like Interval,
            # we need to ensure __array__ get's all the way to an
            # ndarray.
            ret = np.asarray(ret)
        return ret 
Example #4
Source File: multi.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def values(self):
        if self._tuples is not None:
            return self._tuples

        values = []
        for lev, lab in zip(self.levels, self.labels):
            # Need to box timestamps, etc.
            box = hasattr(lev, '_box_values')
            # Try to minimize boxing.
            if box and len(lev) > len(lab):
                taken = lev._box_values(algos.take_1d(lev._values, lab))
            elif box:
                taken = algos.take_1d(lev._box_values(lev._values), lab,
                                      fill_value=_get_na_value(lev.dtype.type))
            else:
                taken = algos.take_1d(np.asarray(lev._values), lab)
            values.append(taken)

        self._tuples = lib.fast_zip(values)
        return self._tuples

    # fml 
Example #5
Source File: multi.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def _get_level_values(self, level):
        """
        Return vector of label values for requested level,
        equal to the length of the index

        **this is an internal method**

        Parameters
        ----------
        level : int level

        Returns
        -------
        values : ndarray
        """

        unique = self.levels[level]
        labels = self.labels[level]
        filled = algos.take_1d(unique._values, labels,
                               fill_value=unique._na_value)
        values = unique._shallow_copy(filled)
        return values 
Example #6
Source File: managers.py    From recruit with Apache License 2.0 6 votes vote down vote up
def combine(self, blocks, copy=True):
        """ return a new manager with the blocks """
        if len(blocks) == 0:
            return self.make_empty()

        # FIXME: optimization potential
        indexer = np.sort(np.concatenate([b.mgr_locs.as_array
                                          for b in blocks]))
        inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

        new_blocks = []
        for b in blocks:
            b = b.copy(deep=copy)
            b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
                                       axis=0, allow_fill=False)
            new_blocks.append(b)

        axes = list(self.axes)
        axes[0] = self.items.take(indexer)

        return self.__class__(new_blocks, axes, do_integrity_check=False) 
Example #7
Source File: internals.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def combine(self, blocks, copy=True):
        """ return a new manager with the blocks """
        if len(blocks) == 0:
            return self.make_empty()

        # FIXME: optimization potential
        indexer = np.sort(np.concatenate([b.mgr_locs.as_array
                                          for b in blocks]))
        inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

        new_blocks = []
        for b in blocks:
            b = b.copy(deep=copy)
            b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
                                       axis=0, allow_fill=False)
            new_blocks.append(b)

        axes = list(self.axes)
        axes[0] = self.items.take(indexer)

        return self.__class__(new_blocks, axes, do_integrity_check=False) 
Example #8
Source File: generic.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _transform_fast(self, result, obj, func_nm):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = self._transform_should_cast(func_nm)

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index) 
Example #9
Source File: groupby.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def _transform_fast(self, result, obj):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = (self.size().fillna(0) > 0).any()

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index) 
Example #10
Source File: multi.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def _get_level_values(self, level):
        """
        Return vector of label values for requested level,
        equal to the length of the index

        **this is an internal method**

        Parameters
        ----------
        level : int level

        Returns
        -------
        values : ndarray
        """

        unique = self.levels[level]
        labels = self.labels[level]
        filled = algos.take_1d(unique._values, labels,
                               fill_value=unique._na_value)
        values = unique._shallow_copy(filled)
        return values 
Example #11
Source File: groupby.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _transform_fast(self, result, obj, func_nm):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = self._transform_should_cast(func_nm)

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index) 
Example #12
Source File: multi.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def values(self):
        if self._tuples is not None:
            return self._tuples

        values = []
        for lev, lab in zip(self.levels, self.labels):
            # Need to box timestamps, etc.
            box = hasattr(lev, '_box_values')
            # Try to minimize boxing.
            if box and len(lev) > len(lab):
                taken = lev._box_values(algos.take_1d(lev._values, lab))
            elif box:
                taken = algos.take_1d(lev._box_values(lev._values), lab,
                                      fill_value=_get_na_value(lev.dtype.type))
            else:
                taken = algos.take_1d(np.asarray(lev._values), lab)
            values.append(taken)

        self._tuples = lib.fast_zip(values)
        return self._tuples

    # fml 
Example #13
Source File: internals.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def combine(self, blocks, copy=True):
        """ return a new manager with the blocks """
        if len(blocks) == 0:
            return self.make_empty()

        # FIXME: optimization potential
        indexer = np.sort(np.concatenate([b.mgr_locs.as_array
                                          for b in blocks]))
        inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

        new_blocks = []
        for b in blocks:
            b = b.copy(deep=copy)
            b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
                                       axis=0, allow_fill=False)
            new_blocks.append(b)

        axes = list(self.axes)
        axes[0] = self.items.take(indexer)

        return self.__class__(new_blocks, axes, do_integrity_check=False) 
Example #14
Source File: generic.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _transform_fast(self, result, obj, func_nm):
        """
        Fast transform path for aggregations
        """
        # if there were groups with no observations (Categorical only?)
        # try casting data to original dtype
        cast = self._transform_should_cast(func_nm)

        # for each col, reshape to to size of original frame
        # by take operation
        ids, _, ngroup = self.grouper.group_info
        output = []
        for i, _ in enumerate(result.columns):
            res = algorithms.take_1d(result.iloc[:, i].values, ids)
            if cast:
                res = self._try_cast(res, obj.iloc[:, i])
            output.append(res)

        return DataFrame._from_arrays(output, columns=result.columns,
                                      index=obj.index) 
Example #15
Source File: managers.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def combine(self, blocks, copy=True):
        """ return a new manager with the blocks """
        if len(blocks) == 0:
            return self.make_empty()

        # FIXME: optimization potential
        indexer = np.sort(np.concatenate([b.mgr_locs.as_array
                                          for b in blocks]))
        inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])

        new_blocks = []
        for b in blocks:
            b = b.copy(deep=copy)
            b.mgr_locs = algos.take_1d(inv_indexer, b.mgr_locs.as_array,
                                       axis=0, allow_fill=False)
            new_blocks.append(b)

        axes = list(self.axes)
        axes[0] = self.items.take(indexer)

        return self.__class__(new_blocks, axes, do_integrity_check=False) 
Example #16
Source File: test_take.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_1d_fill_nonna(self):
        def _test_dtype(dtype, fill_value, out_dtype):
            data = np.random.randint(0, 2, 4).astype(dtype)

            indexer = [2, 1, 0, -1]

            result = algos.take_1d(data, indexer, fill_value=fill_value)
            assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all())
            assert (result[3] == fill_value)
            assert (result.dtype == out_dtype)

            indexer = [2, 1, 0, 1]

            result = algos.take_1d(data, indexer, fill_value=fill_value)
            assert ((result[[0, 1, 2, 3]] == data[indexer]).all())
            assert (result.dtype == dtype)

        _test_dtype(np.int8, np.int16(127), np.int8)
        _test_dtype(np.int8, np.int16(128), np.int16)
        _test_dtype(np.int32, 1, np.int32)
        _test_dtype(np.int32, 2.0, np.float64)
        _test_dtype(np.int32, 3.0 + 4.0j, np.complex128)
        _test_dtype(np.int32, True, np.object_)
        _test_dtype(np.int32, '', np.object_)
        _test_dtype(np.float64, 1, np.float64)
        _test_dtype(np.float64, 2.0, np.float64)
        _test_dtype(np.float64, 3.0 + 4.0j, np.complex128)
        _test_dtype(np.float64, True, np.object_)
        _test_dtype(np.float64, '', np.object_)
        _test_dtype(np.complex128, 1, np.complex128)
        _test_dtype(np.complex128, 2.0, np.complex128)
        _test_dtype(np.complex128, 3.0 + 4.0j, np.complex128)
        _test_dtype(np.complex128, True, np.object_)
        _test_dtype(np.complex128, '', np.object_)
        _test_dtype(np.bool_, 1, np.object_)
        _test_dtype(np.bool_, 2.0, np.object_)
        _test_dtype(np.bool_, 3.0 + 4.0j, np.object_)
        _test_dtype(np.bool_, True, np.bool_)
        _test_dtype(np.bool_, '', np.object_) 
Example #17
Source File: test_take.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_1d_with_out(self):
        def _test_dtype(dtype, can_hold_na, writeable=True):
            data = np.random.randint(0, 2, 4).astype(dtype)
            data.flags.writeable = writeable

            indexer = [2, 1, 0, 1]
            out = np.empty(4, dtype=dtype)
            algos.take_1d(data, indexer, out=out)
            expected = data.take(indexer)
            tm.assert_almost_equal(out, expected)

            indexer = [2, 1, 0, -1]
            out = np.empty(4, dtype=dtype)
            if can_hold_na:
                algos.take_1d(data, indexer, out=out)
                expected = data.take(indexer)
                expected[3] = np.nan
                tm.assert_almost_equal(out, expected)
            else:
                with tm.assert_raises_regex(TypeError, self.fill_error):
                    algos.take_1d(data, indexer, out=out)
                # no exception o/w
                data.take(indexer, out=out)

        for writeable in [True, False]:
            # Check that take_nd works both with writeable arrays (in which
            # case fast typed memoryviews implementation) and read-only
            # arrays alike.
            _test_dtype(np.float64, True, writeable=writeable)
            _test_dtype(np.float32, True, writeable=writeable)
            _test_dtype(np.uint64, False, writeable=writeable)
            _test_dtype(np.uint32, False, writeable=writeable)
            _test_dtype(np.uint16, False, writeable=writeable)
            _test_dtype(np.uint8, False, writeable=writeable)
            _test_dtype(np.int64, False, writeable=writeable)
            _test_dtype(np.int32, False, writeable=writeable)
            _test_dtype(np.int16, False, writeable=writeable)
            _test_dtype(np.int8, False, writeable=writeable)
            _test_dtype(np.object_, True, writeable=writeable)
            _test_dtype(np.bool, False, writeable=writeable) 
Example #18
Source File: test_take.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_1d_other_dtypes(self):
        arr = np.random.randn(10).astype(np.float32)

        indexer = [1, 2, 3, -1]
        result = algos.take_1d(arr, indexer)
        expected = arr.take(indexer)
        expected[-1] = np.nan
        tm.assert_almost_equal(result, expected) 
Example #19
Source File: category.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def get_indexer(self, target, method=None, limit=None, tolerance=None):
        method = missing.clean_reindex_fill_method(method)
        target = ibase._ensure_index(target)

        if self.is_unique and self.equals(target):
            return np.arange(len(self), dtype='intp')

        if method == 'pad' or method == 'backfill':
            raise NotImplementedError("method='pad' and method='backfill' not "
                                      "implemented yet for CategoricalIndex")
        elif method == 'nearest':
            raise NotImplementedError("method='nearest' not implemented yet "
                                      'for CategoricalIndex')

        if (isinstance(target, CategoricalIndex) and
                self.values.is_dtype_equal(target)):
            # we have the same codes
            codes = target.codes
        else:
            if isinstance(target, CategoricalIndex):
                code_indexer = self.categories.get_indexer(target.categories)
                codes = take_1d(code_indexer, target.codes, fill_value=-1)
            else:
                codes = self.categories.get_indexer(target)

        indexer, _ = self._engine.get_indexer_non_unique(codes)
        return _ensure_platform_int(indexer) 
Example #20
Source File: frame.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
                               limit=None, copy=False, allow_dups=False):

        if method is not None or limit is not None:
            raise NotImplementedError("cannot reindex with a method or limit "
                                      "with sparse")

        if fill_value is None:
            fill_value = np.nan

        index, row_indexer = reindexers.get(0, (None, None))
        columns, col_indexer = reindexers.get(1, (None, None))

        if columns is None:
            columns = self.columns

        new_arrays = {}
        for col in columns:
            if col not in self:
                continue
            if row_indexer is not None:
                new_arrays[col] = algos.take_1d(self[col].get_values(),
                                                row_indexer,
                                                fill_value=fill_value)
            else:
                new_arrays[col] = self[col]

        return self._constructor(new_arrays, index=index,
                                 columns=columns).__finalize__(self) 
Example #21
Source File: series.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def _reindex_indexer(self, new_index, indexer, copy):
        if indexer is None:
            if copy:
                return self.copy()
            return self

        # be subclass-friendly
        new_values = algorithms.take_1d(self.get_values(), indexer)
        return self._constructor(new_values, index=new_index) 
Example #22
Source File: resample.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _take_new_index(obj, indexer, new_index, axis=0):
    from pandas.core.api import Series, DataFrame

    if isinstance(obj, Series):
        new_values = algos.take_1d(obj.values, indexer)
        return Series(new_values, index=new_index, name=obj.name)
    elif isinstance(obj, DataFrame):
        if axis == 1:
            raise NotImplementedError("axis 1 is not supported")
        return DataFrame(obj._data.reindex_indexer(
            new_axis=new_index, indexer=indexer, axis=1))
    else:
        raise ValueError("'obj' should be either a Series or a DataFrame") 
Example #23
Source File: series.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _reindex_indexer(self, new_index, indexer, copy):
        if indexer is None:
            if copy:
                return self.copy()
            return self

        new_values = algorithms.take_1d(self._values, indexer,
                                        allow_fill=True, fill_value=None)
        return self._constructor(new_values, index=new_index) 
Example #24
Source File: resample.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def _take_new_index(obj, indexer, new_index, axis=0):
    from pandas.core.api import Series, DataFrame

    if isinstance(obj, Series):
        new_values = algos.take_1d(obj.values, indexer)
        return Series(new_values, index=new_index, name=obj.name)
    elif isinstance(obj, DataFrame):
        if axis == 1:
            raise NotImplementedError("axis 1 is not supported")
        return DataFrame(obj._data.reindex_indexer(
            new_axis=new_index, indexer=indexer, axis=1))
    else:
        raise ValueError("'obj' should be either a Series or a DataFrame") 
Example #25
Source File: accessors.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _delegate_property_get(self, name):
        from pandas import Series
        values = self._get_values()

        result = getattr(values, name)

        # maybe need to upcast (ints)
        if isinstance(result, np.ndarray):
            if is_integer_dtype(result):
                result = result.astype('int64')
        elif not is_list_like(result):
            return result

        result = np.asarray(result)

        # blow up if we operate on categories
        if self.orig is not None:
            result = take_1d(result, self.orig.cat.codes)
            index = self.orig.index
        else:
            index = self._parent.index
        # return the result as a Series, which is by definition a copy
        result = Series(result, index=index, name=self.name)

        # setting this object will show a SettingWithCopyWarning/Error
        result._is_copy = ("modifications to a property of a datetimelike "
                           "object are not supported and are discarded. "
                           "Change values on the original.")

        return result 
Example #26
Source File: multi.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _get_level_values(self, level, unique=False):
        """
        Return vector of label values for requested level,
        equal to the length of the index

        **this is an internal method**

        Parameters
        ----------
        level : int level
        unique : bool, default False
            if True, drop duplicated values

        Returns
        -------
        values : ndarray
        """

        values = self.levels[level]
        level_codes = self.codes[level]
        if unique:
            level_codes = algos.unique(level_codes)
        filled = algos.take_1d(values._values, level_codes,
                               fill_value=values._na_value)
        values = values._shallow_copy(filled)
        return values 
Example #27
Source File: categorical.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _recode_for_categories(codes, old_categories, new_categories):
    """
    Convert a set of codes for to a new set of categories

    Parameters
    ----------
    codes : array
    old_categories, new_categories : Index

    Returns
    -------
    new_codes : array

    Examples
    --------
    >>> old_cat = pd.Index(['b', 'a', 'c'])
    >>> new_cat = pd.Index(['a', 'b'])
    >>> codes = np.array([0, 1, 1, 2])
    >>> _recode_for_categories(codes, old_cat, new_cat)
    array([ 1,  0,  0, -1])
    """
    from pandas.core.algorithms import take_1d

    if len(old_categories) == 0:
        # All null anyway, so just retain the nulls
        return codes.copy()
    elif new_categories.equals(old_categories):
        # Same categories, so no need to actually recode
        return codes.copy()
    indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories),
                                   new_categories)
    new_codes = take_1d(indexer, codes.copy(), fill_value=-1)
    return new_codes 
Example #28
Source File: internals.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def get_dtypes(self):
        dtypes = np.array([blk.dtype for blk in self.blocks])
        return algos.take_1d(dtypes, self._blknos, allow_fill=False) 
Example #29
Source File: internals.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def get_ftypes(self):
        ftypes = np.array([blk.ftype for blk in self.blocks])
        return algos.take_1d(ftypes, self._blknos, allow_fill=False) 
Example #30
Source File: groupby.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _transform_fast(self, func):
        """
        fast version of transform, only applicable to
        builtin/cythonizable functions
        """
        if isinstance(func, compat.string_types):
            func = getattr(self, func)

        ids, _, ngroup = self.grouper.group_info
        cast = (self.size().fillna(0) > 0).any()
        out = algorithms.take_1d(func().values, ids)
        if cast:
            out = self._try_cast(out, self.obj)
        return Series(out, index=self.obj.index, name=self.obj.name)