Python pandas.core.internals.BlockManager() Examples

The following are 30 code examples for showing how to use pandas.core.internals.BlockManager(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module pandas.core.internals , or try the search function .

Example 1
Project: recruit   Author: Frank-qlu   File: test_internals.py    License: Apache License 2.0 6 votes vote down vote up
def test_duplicate_ref_loc_failure(self):
        tmp_mgr = create_mgr('a:bool; a: f8')

        axes, blocks = tmp_mgr.axes, tmp_mgr.blocks

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([0])

        # test trying to create block manager with overlapping ref locs
        with pytest.raises(AssertionError):
            BlockManager(blocks, axes)

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([1])
        mgr = BlockManager(blocks, axes)
        mgr.iget(1) 
Example 2
Project: recruit   Author: Frank-qlu   File: test_internals.py    License: Apache License 2.0 6 votes vote down vote up
def test_equals_block_order_different_dtypes(self):
        # GH 9330

        mgr_strings = [
            "a:i8;b:f8",  # basic case
            "a:i8;b:f8;c:c8;d:b",  # many types
            "a:i8;e:dt;f:td;g:string",  # more types
            "a:i8;b:category;c:category2;d:category2",  # categories
            "c:sparse;d:sparse_na;b:f8",  # sparse
        ]

        for mgr_string in mgr_strings:
            bm = create_mgr(mgr_string)
            block_perms = itertools.permutations(bm.blocks)
            for bm_perm in block_perms:
                bm_this = BlockManager(bm_perm, bm.axes)
                assert bm.equals(bm_this)
                assert bm_this.equals(bm) 
Example 3
Project: recruit   Author: Frank-qlu   File: blocks.py    License: Apache License 2.0 6 votes vote down vote up
def _extend_blocks(result, blocks=None):
    """ return a new extended blocks, givin the result """
    from pandas.core.internals import BlockManager
    if blocks is None:
        blocks = []
    if isinstance(result, list):
        for r in result:
            if isinstance(r, list):
                blocks.extend(r)
            else:
                blocks.append(r)
    elif isinstance(result, BlockManager):
        blocks.extend(result.blocks)
    else:
        blocks.append(result)
    return blocks 
Example 4
Project: Computable   Author: ktraunmueller   File: generic.py    License: MIT License 6 votes vote down vote up
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):
        """ passed a manager and a axes dict """
        for a, axe in axes.items():
            if axe is not None:
                mgr = mgr.reindex_axis(
                    axe, axis=self._get_block_manager_axis(a), copy=False)

        # do not copy BlockManager unless explicitly done
        if copy and dtype is None:
            mgr = mgr.copy()
        elif dtype is not None:
            # avoid copy if we can
            if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype:
                mgr = mgr.astype(dtype)
        return mgr

    #----------------------------------------------------------------------
    # Construction 
Example 5
Project: Computable   Author: ktraunmueller   File: groupby.py    License: MIT License 6 votes vote down vote up
def _wrap_agged_blocks(self, blocks):
        obj = self._obj_with_exclusions

        new_axes = list(obj._data.axes)

        # more kludge
        if self.axis == 0:
            new_axes[0], new_axes[1] = new_axes[1], self.grouper.result_index
        else:
            new_axes[self.axis] = self.grouper.result_index

        mgr = BlockManager(blocks, new_axes)

        new_obj = type(obj)(mgr)

        return self._post_process_cython_aggregate(new_obj) 
Example 6
Project: Computable   Author: ktraunmueller   File: pytables.py    License: MIT License 6 votes vote down vote up
def read(self, **kwargs):
        self.validate_read(kwargs)

        axes = []
        for i in range(self.ndim):
            ax = self.read_index('axis%d' % i)
            axes.append(ax)

        items = axes[0]
        blocks = []
        for i in range(self.nblocks):
            blk_items = self.read_index('block%d_items' % i)
            values = self.read_array('block%d_values' % i)
            blk = make_block(values, blk_items, items)
            blocks.append(blk)

        return self.obj_type(BlockManager(blocks, axes)) 
Example 7
def test_duplicate_ref_loc_failure(self):
        tmp_mgr = create_mgr('a:bool; a: f8')

        axes, blocks = tmp_mgr.axes, tmp_mgr.blocks

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([0])

        # test trying to create block manager with overlapping ref locs
        with pytest.raises(AssertionError):
            BlockManager(blocks, axes)

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([1])
        mgr = BlockManager(blocks, axes)
        mgr.iget(1) 
Example 8
def test_equals_block_order_different_dtypes(self):
        # GH 9330

        mgr_strings = [
            "a:i8;b:f8",  # basic case
            "a:i8;b:f8;c:c8;d:b",  # many types
            "a:i8;e:dt;f:td;g:string",  # more types
            "a:i8;b:category;c:category2;d:category2",  # categories
            "c:sparse;d:sparse_na;b:f8",  # sparse
        ]

        for mgr_string in mgr_strings:
            bm = create_mgr(mgr_string)
            block_perms = itertools.permutations(bm.blocks)
            for bm_perm in block_perms:
                bm_this = BlockManager(bm_perm, bm.axes)
                assert bm.equals(bm_this)
                assert bm_this.equals(bm) 
Example 9
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: blocks.py    License: Apache License 2.0 6 votes vote down vote up
def _extend_blocks(result, blocks=None):
    """ return a new extended blocks, givin the result """
    from pandas.core.internals import BlockManager
    if blocks is None:
        blocks = []
    if isinstance(result, list):
        for r in result:
            if isinstance(r, list):
                blocks.extend(r)
            else:
                blocks.append(r)
    elif isinstance(result, BlockManager):
        blocks.extend(result.blocks)
    else:
        blocks.append(result)
    return blocks 
Example 10
Project: coffeegrindsize   Author: jgagneastro   File: test_internals.py    License: MIT License 6 votes vote down vote up
def test_duplicate_ref_loc_failure(self):
        tmp_mgr = create_mgr('a:bool; a: f8')

        axes, blocks = tmp_mgr.axes, tmp_mgr.blocks

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([0])

        # test trying to create block manager with overlapping ref locs
        with pytest.raises(AssertionError):
            BlockManager(blocks, axes)

        blocks[0].mgr_locs = np.array([0])
        blocks[1].mgr_locs = np.array([1])
        mgr = BlockManager(blocks, axes)
        mgr.iget(1) 
Example 11
Project: coffeegrindsize   Author: jgagneastro   File: test_internals.py    License: MIT License 6 votes vote down vote up
def test_equals_block_order_different_dtypes(self):
        # GH 9330

        mgr_strings = [
            "a:i8;b:f8",  # basic case
            "a:i8;b:f8;c:c8;d:b",  # many types
            "a:i8;e:dt;f:td;g:string",  # more types
            "a:i8;b:category;c:category2;d:category2",  # categories
            "c:sparse;d:sparse_na;b:f8",  # sparse
        ]

        for mgr_string in mgr_strings:
            bm = create_mgr(mgr_string)
            block_perms = itertools.permutations(bm.blocks)
            for bm_perm in block_perms:
                bm_this = BlockManager(bm_perm, bm.axes)
                assert bm.equals(bm_this)
                assert bm_this.equals(bm) 
Example 12
Project: recruit   Author: Frank-qlu   File: test_external_block.py    License: Apache License 2.0 5 votes vote down vote up
def df():
    df1 = pd.DataFrame({'a': [1, 2, 3]})
    blocks = df1._data.blocks
    values = np.arange(3, dtype='int64')
    custom_block = CustomBlock(values, placement=slice(1, 2))
    blocks = blocks + (custom_block,)
    block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df1.index])
    return pd.DataFrame(block_manager) 
Example 13
Project: recruit   Author: Frank-qlu   File: test_external_block.py    License: Apache License 2.0 5 votes vote down vote up
def test_custom_repr():
    values = np.arange(3, dtype='int64')

    # series
    block = CustomBlock(values, placement=slice(0, 3))

    s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
    assert repr(s) == '0    Val: 0\n1    Val: 1\n2    Val: 2\ndtype: int64'

    # dataframe
    block = CustomBlock(values, placement=slice(0, 1))
    blk_mgr = BlockManager([block], [['col'], range(3)])
    df = pd.DataFrame(blk_mgr)
    assert repr(df) == '      col\n0  Val: 0\n1  Val: 1\n2  Val: 2' 
Example 14
Project: recruit   Author: Frank-qlu   File: test_internals.py    License: Apache License 2.0 5 votes vote down vote up
def test_get(self):
        cols = Index(list('abc'))
        values = np.random.rand(3, 3)
        block = make_block(values=values.copy(), placement=np.arange(3))
        mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])

        assert_almost_equal(mgr.get('a', fastpath=False), values[0])
        assert_almost_equal(mgr.get('b', fastpath=False), values[1])
        assert_almost_equal(mgr.get('c', fastpath=False), values[2])
        assert_almost_equal(mgr.get('a').internal_values(), values[0])
        assert_almost_equal(mgr.get('b').internal_values(), values[1])
        assert_almost_equal(mgr.get('c').internal_values(), values[2]) 
Example 15
Project: recruit   Author: Frank-qlu   File: test_internals.py    License: Apache License 2.0 5 votes vote down vote up
def test_equals(self):
        # unique items
        bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2)

        bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2) 
Example 16
Project: recruit   Author: Frank-qlu   File: blocks.py    License: Apache License 2.0 5 votes vote down vote up
def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
        """Return a list of unstacked blocks of self

        Parameters
        ----------
        unstacker_func : callable
            Partially applied unstacker.
        new_columns : Index
            All columns of the unstacked BlockManager.
        n_rows : int
            Only used in ExtensionBlock.unstack
        fill_value : int
            Only used in ExtensionBlock.unstack

        Returns
        -------
        blocks : list of Block
            New blocks of unstacked values.
        mask : array_like of bool
            The mask of columns of `blocks` we should keep.
        """
        unstacker = unstacker_func(self.values.T)
        new_items = unstacker.get_new_columns()
        new_placement = new_columns.get_indexer(new_items)
        new_values, mask = unstacker.get_new_values()

        mask = mask.any(0)
        new_values = new_values.T[mask]
        new_placement = new_placement[mask]

        blocks = [make_block(new_values, placement=new_placement)]
        return blocks, mask 
Example 17
Project: recruit   Author: Frank-qlu   File: blocks.py    License: Apache License 2.0 5 votes vote down vote up
def _get_unstack_items(self, unstacker, new_columns):
        """
        Get the placement, values, and mask for a Block unstack.

        This is shared between ObjectBlock and ExtensionBlock. They
        differ in that ObjectBlock passes the values, while ExtensionBlock
        passes the dummy ndarray of positions to be used by a take
        later.

        Parameters
        ----------
        unstacker : pandas.core.reshape.reshape._Unstacker
        new_columns : Index
            All columns of the unstacked BlockManager.

        Returns
        -------
        new_placement : ndarray[int]
            The placement of the new columns in `new_columns`.
        new_values : Union[ndarray, ExtensionArray]
            The first return value from _Unstacker.get_new_values.
        mask : ndarray[bool]
            The second return value from _Unstacker.get_new_values.
        """
        # shared with ExtensionBlock
        new_items = unstacker.get_new_columns()
        new_placement = new_columns.get_indexer(new_items)
        new_values, mask = unstacker.get_new_values()

        mask = mask.any(0)
        return new_placement, new_values, mask 
Example 18
Project: Computable   Author: ktraunmueller   File: frame.py    License: MIT License 5 votes vote down vote up
def dict_to_manager(sdict, columns, index):
    """ create and return the block manager from a dict of series, columns, index """

    # from BlockManager perspective
    axes = [_ensure_index(columns), _ensure_index(index)]

    return create_block_manager_from_arrays([sdict[c] for c in columns], columns, axes) 
Example 19
Project: Computable   Author: ktraunmueller   File: generic.py    License: MIT License 5 votes vote down vote up
def _from_axes(cls, data, axes):
        # for construction from BlockManager
        if isinstance(data, BlockManager):
            return cls(data)
        else:
            if cls._AXIS_REVERSED:
                axes = axes[::-1]
            d = cls._construct_axes_dict_from(cls, axes, copy=False)
            return cls(data, **d) 
Example 20
Project: Computable   Author: ktraunmueller   File: generic.py    License: MIT License 5 votes vote down vote up
def as_blocks(self, columns=None):
        """
        Convert the frame to a dict of dtype -> Constructor Types that each has
        a homogeneous dtype.

        are presented in sorted order unless a specific list of columns is
        provided.

        NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in
              as_matrix)

        Parameters
        ----------
        columns : array-like
            Specific column order

        Returns
        -------
        values : a list of Object
        """
        self._consolidate_inplace()

        bd = dict()
        for b in self._data.blocks:
            b = b.reindex_items_from(columns or b.items)
            bd[str(b.dtype)] = self._constructor(
                BlockManager([b], [b.items, self.index])).__finalize__(self)
        return bd 
Example 21
Project: Computable   Author: ktraunmueller   File: groupby.py    License: MIT License 5 votes vote down vote up
def _wrap_agged_blocks(self, blocks):
        obj = self._obj_with_exclusions

        if self.axis == 0:
            agg_labels = obj.columns
        else:
            agg_labels = obj.index

        if sum(len(x.items) for x in blocks) == len(agg_labels):
            output_keys = agg_labels
        else:
            all_items = []
            for b in blocks:
                all_items.extend(b.items)
            output_keys = agg_labels[agg_labels.isin(all_items)]

            for blk in blocks:
                blk.set_ref_items(output_keys, maybe_rename=False)

        if not self.as_index:
            index = np.arange(blocks[0].values.shape[1])
            mgr = BlockManager(blocks, [output_keys, index])
            result = DataFrame(mgr)

            group_levels = self.grouper.get_group_levels()
            zipped = zip(self.grouper.names, group_levels)

            for i, (name, labels) in enumerate(zipped):
                result.insert(i, name, labels)
            result = result.consolidate()
        else:
            index = self.grouper.result_index
            mgr = BlockManager(blocks, [output_keys, index])
            result = DataFrame(mgr)

        if self.axis == 1:
            result = result.T

        return result.convert_objects() 
Example 22
Project: Computable   Author: ktraunmueller   File: groupby.py    License: MIT License 5 votes vote down vote up
def _get_sorted_data(self):
        # this is the BlockManager
        data = self.data._data

        # this is sort of wasteful but...
        sorted_axis = data.axes[self.axis].take(self.sort_idx)
        sorted_data = data.reindex_axis(sorted_axis, axis=self.axis)

        return sorted_data 
Example 23
def df():
    df1 = pd.DataFrame({'a': [1, 2, 3]})
    blocks = df1._data.blocks
    values = np.arange(3, dtype='int64')
    custom_block = CustomBlock(values, placement=slice(1, 2))
    blocks = blocks + (custom_block,)
    block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df1.index])
    return pd.DataFrame(block_manager) 
Example 24
def test_custom_repr():
    values = np.arange(3, dtype='int64')

    # series
    block = CustomBlock(values, placement=slice(0, 3))

    s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
    assert repr(s) == '0    Val: 0\n1    Val: 1\n2    Val: 2\ndtype: int64'

    # dataframe
    block = CustomBlock(values, placement=slice(0, 1))
    blk_mgr = BlockManager([block], [['col'], range(3)])
    df = pd.DataFrame(blk_mgr)
    assert repr(df) == '      col\n0  Val: 0\n1  Val: 1\n2  Val: 2' 
Example 25
def test_get(self):
        cols = Index(list('abc'))
        values = np.random.rand(3, 3)
        block = make_block(values=values.copy(), placement=np.arange(3))
        mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])

        assert_almost_equal(mgr.get('a', fastpath=False), values[0])
        assert_almost_equal(mgr.get('b', fastpath=False), values[1])
        assert_almost_equal(mgr.get('c', fastpath=False), values[2])
        assert_almost_equal(mgr.get('a').internal_values(), values[0])
        assert_almost_equal(mgr.get('b').internal_values(), values[1])
        assert_almost_equal(mgr.get('c').internal_values(), values[2]) 
Example 26
def test_equals(self):
        # unique items
        bm1 = create_mgr('a,b,c: i8-1; d,e,f: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2)

        bm1 = create_mgr('a,a,a: i8-1; b,b,b: i8-2')
        bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
        assert bm1.equals(bm2) 
Example 27
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: blocks.py    License: Apache License 2.0 5 votes vote down vote up
def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
        """Return a list of unstacked blocks of self

        Parameters
        ----------
        unstacker_func : callable
            Partially applied unstacker.
        new_columns : Index
            All columns of the unstacked BlockManager.
        n_rows : int
            Only used in ExtensionBlock.unstack
        fill_value : int
            Only used in ExtensionBlock.unstack

        Returns
        -------
        blocks : list of Block
            New blocks of unstacked values.
        mask : array_like of bool
            The mask of columns of `blocks` we should keep.
        """
        unstacker = unstacker_func(self.values.T)
        new_items = unstacker.get_new_columns()
        new_placement = new_columns.get_indexer(new_items)
        new_values, mask = unstacker.get_new_values()

        mask = mask.any(0)
        new_values = new_values.T[mask]
        new_placement = new_placement[mask]

        blocks = [make_block(new_values, placement=new_placement)]
        return blocks, mask 
Example 28
Project: predictive-maintenance-using-machine-learning   Author: awslabs   File: blocks.py    License: Apache License 2.0 5 votes vote down vote up
def _get_unstack_items(self, unstacker, new_columns):
        """
        Get the placement, values, and mask for a Block unstack.

        This is shared between ObjectBlock and ExtensionBlock. They
        differ in that ObjectBlock passes the values, while ExtensionBlock
        passes the dummy ndarray of positions to be used by a take
        later.

        Parameters
        ----------
        unstacker : pandas.core.reshape.reshape._Unstacker
        new_columns : Index
            All columns of the unstacked BlockManager.

        Returns
        -------
        new_placement : ndarray[int]
            The placement of the new columns in `new_columns`.
        new_values : Union[ndarray, ExtensionArray]
            The first return value from _Unstacker.get_new_values.
        mask : ndarray[bool]
            The second return value from _Unstacker.get_new_values.
        """
        # shared with ExtensionBlock
        new_items = unstacker.get_new_columns()
        new_placement = new_columns.get_indexer(new_items)
        new_values, mask = unstacker.get_new_values()

        mask = mask.any(0)
        return new_placement, new_values, mask 
Example 29
Project: coffeegrindsize   Author: jgagneastro   File: test_external_block.py    License: MIT License 5 votes vote down vote up
def df():
    df1 = pd.DataFrame({'a': [1, 2, 3]})
    blocks = df1._data.blocks
    values = np.arange(3, dtype='int64')
    custom_block = CustomBlock(values, placement=slice(1, 2))
    blocks = blocks + (custom_block,)
    block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df1.index])
    return pd.DataFrame(block_manager) 
Example 30
Project: coffeegrindsize   Author: jgagneastro   File: test_external_block.py    License: MIT License 5 votes vote down vote up
def test_custom_repr():
    values = np.arange(3, dtype='int64')

    # series
    block = CustomBlock(values, placement=slice(0, 3))

    s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
    assert repr(s) == '0    Val: 0\n1    Val: 1\n2    Val: 2\ndtype: int64'

    # dataframe
    block = CustomBlock(values, placement=slice(0, 1))
    blk_mgr = BlockManager([block], [['col'], range(3)])
    df = pd.DataFrame(blk_mgr)
    assert repr(df) == '      col\n0  Val: 0\n1  Val: 1\n2  Val: 2'