Python pandas.compat.filter() Examples

The following are 25 code examples of pandas.compat.filter(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.compat , or try the search function .
Example #1
Source File: pytables.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def _ensure_term(where, scope_level):
    """
    ensure that the where is a Term or a list of Term
    this makes sure that we are capturing the scope of variables
    that are passed
    create the terms here with a frame_level=2 (we are 2 levels down)
    """

    # only consider list/tuple here as an ndarray is automaticaly a coordinate
    # list
    level = scope_level + 1
    if isinstance(where, (list, tuple)):
        wlist = []
        for w in filter(lambda x: x is not None, where):
            if not maybe_expression(w):
                wlist.append(w)
            else:
                wlist.append(Term(w, scope_level=level))
        where = wlist
    elif maybe_expression(where):
        where = Term(where, scope_level=level)
    return where 
Example #2
Source File: pytables.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
        """select coordinates (row numbers) from a table; return the
        coordinates object
        """

        # validate the version
        self.validate_version(where)

        # infer the data kind
        if not self.infer_axes():
            return False

        # create the selection
        self.selection = Selection(
            self, where=where, start=start, stop=stop, **kwargs)
        coords = self.selection.select_coords()
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():
                data = self.read_column(
                    field, start=coords.min(), stop=coords.max() + 1)
                coords = coords[
                    op(data.iloc[coords - coords.min()], filt).values]

        return Index(coords) 
Example #3
Source File: pytables.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def _ensure_term(where, scope_level):
    """
    ensure that the where is a Term or a list of Term
    this makes sure that we are capturing the scope of variables
    that are passed
    create the terms here with a frame_level=2 (we are 2 levels down)
    """

    # only consider list/tuple here as an ndarray is automaticaly a coordinate
    # list
    level = scope_level + 1
    if isinstance(where, (list, tuple)):
        wlist = []
        for w in filter(lambda x: x is not None, where):
            if not maybe_expression(w):
                wlist.append(w)
            else:
                wlist.append(Term(w, scope_level=level))
        where = wlist
    elif maybe_expression(where):
        where = Term(where, scope_level=level)
    return where 
Example #4
Source File: pytables.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
        """select coordinates (row numbers) from a table; return the
        coordinates object
        """

        # validate the version
        self.validate_version(where)

        # infer the data kind
        if not self.infer_axes():
            return False

        # create the selection
        self.selection = Selection(
            self, where=where, start=start, stop=stop, **kwargs)
        coords = self.selection.select_coords()
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():
                data = self.read_column(
                    field, start=coords.min(), stop=coords.max() + 1)
                coords = coords[
                    op(data.iloc[coords - coords.min()], filt).values]

        return Index(coords) 
Example #5
Source File: pytables.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _ensure_term(where, scope_level):
    """
    ensure that the where is a Term or a list of Term
    this makes sure that we are capturing the scope of variables
    that are passed
    create the terms here with a frame_level=2 (we are 2 levels down)
    """

    # only consider list/tuple here as an ndarray is automatically a coordinate
    # list
    level = scope_level + 1
    if isinstance(where, (list, tuple)):
        wlist = []
        for w in filter(lambda x: x is not None, where):
            if not maybe_expression(w):
                wlist.append(w)
            else:
                wlist.append(Term(w, scope_level=level))
        where = wlist
    elif maybe_expression(where):
        where = Term(where, scope_level=level)
    return where 
Example #6
Source File: pytables.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
        """select coordinates (row numbers) from a table; return the
        coordinates object
        """

        # validate the version
        self.validate_version(where)

        # infer the data kind
        if not self.infer_axes():
            return False

        # create the selection
        self.selection = Selection(
            self, where=where, start=start, stop=stop, **kwargs)
        coords = self.selection.select_coords()
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():
                data = self.read_column(
                    field, start=coords.min(), stop=coords.max() + 1)
                coords = coords[
                    op(data.iloc[coords - coords.min()], filt).values]

        return Index(coords) 
Example #7
Source File: pytables.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _ensure_term(where, scope_level):
    """
    ensure that the where is a Term or a list of Term
    this makes sure that we are capturing the scope of variables
    that are passed
    create the terms here with a frame_level=2 (we are 2 levels down)
    """

    # only consider list/tuple here as an ndarray is automatically a coordinate
    # list
    level = scope_level + 1
    if isinstance(where, (list, tuple)):
        wlist = []
        for w in filter(lambda x: x is not None, where):
            if not maybe_expression(w):
                wlist.append(w)
            else:
                wlist.append(Term(w, scope_level=level))
        where = wlist
    elif maybe_expression(where):
        where = Term(where, scope_level=level)
    return where 
Example #8
Source File: pytables.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
        """select coordinates (row numbers) from a table; return the
        coordinates object
        """

        # validate the version
        self.validate_version(where)

        # infer the data kind
        if not self.infer_axes():
            return False

        # create the selection
        self.selection = Selection(
            self, where=where, start=start, stop=stop, **kwargs)
        coords = self.selection.select_coords()
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():
                data = self.read_column(
                    field, start=coords.min(), stop=coords.max() + 1)
                coords = coords[
                    op(data.iloc[coords - coords.min()], filt).values]

        return Index(coords) 
Example #9
Source File: pytables.py    From recruit with Apache License 2.0 6 votes vote down vote up
def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
        """select coordinates (row numbers) from a table; return the
        coordinates object
        """

        # validate the version
        self.validate_version(where)

        # infer the data kind
        if not self.infer_axes():
            return False

        # create the selection
        self.selection = Selection(
            self, where=where, start=start, stop=stop, **kwargs)
        coords = self.selection.select_coords()
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():
                data = self.read_column(
                    field, start=coords.min(), stop=coords.max() + 1)
                coords = coords[
                    op(data.iloc[coords - coords.min()], filt).values]

        return Index(coords) 
Example #10
Source File: pytables.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _ensure_term(where, scope_level):
    """
    ensure that the where is a Term or a list of Term
    this makes sure that we are capturing the scope of variables
    that are passed
    create the terms here with a frame_level=2 (we are 2 levels down)
    """

    # only consider list/tuple here as an ndarray is automatically a coordinate
    # list
    level = scope_level + 1
    if isinstance(where, (list, tuple)):
        wlist = []
        for w in filter(lambda x: x is not None, where):
            if not maybe_expression(w):
                wlist.append(w)
            else:
                wlist.append(Term(w, scope_level=level))
        where = wlist
    elif maybe_expression(where):
        where = Term(where, scope_level=level)
    return where 
Example #11
Source File: merge.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _get_join_keys(llab, rlab, shape, sort):

    # how many levels can be done without overflow
    pred = lambda i: not is_int64_overflow_possible(shape[:i])
    nlev = next(filter(pred, range(len(shape), 0, -1)))

    # get keys for the first `nlev` levels
    stride = np.prod(shape[1:nlev], dtype='i8')
    lkey = stride * llab[0].astype('i8', subok=False, copy=False)
    rkey = stride * rlab[0].astype('i8', subok=False, copy=False)

    for i in range(1, nlev):
        with np.errstate(divide='ignore'):
            stride //= shape[i]
        lkey += llab[i] * stride
        rkey += rlab[i] * stride

    if nlev == len(shape):  # all done!
        return lkey, rkey

    # densify current keys to avoid overflow
    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

    llab = [lkey] + llab[nlev:]
    rlab = [rkey] + rlab[nlev:]
    shape = [count] + shape[nlev:]

    return _get_join_keys(llab, rlab, shape, sort) 
Example #12
Source File: merge.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _get_join_keys(llab, rlab, shape, sort):

    # how many levels can be done without overflow
    pred = lambda i: not is_int64_overflow_possible(shape[:i])
    nlev = next(filter(pred, range(len(shape), 0, -1)))

    # get keys for the first `nlev` levels
    stride = np.prod(shape[1:nlev], dtype='i8')
    lkey = stride * llab[0].astype('i8', subok=False, copy=False)
    rkey = stride * rlab[0].astype('i8', subok=False, copy=False)

    for i in range(1, nlev):
        with np.errstate(divide='ignore'):
            stride //= shape[i]
        lkey += llab[i] * stride
        rkey += rlab[i] * stride

    if nlev == len(shape):  # all done!
        return lkey, rkey

    # densify current keys to avoid overflow
    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

    llab = [lkey] + llab[nlev:]
    rlab = [rkey] + rlab[nlev:]
    shape = [count] + shape[nlev:]

    return _get_join_keys(llab, rlab, shape, sort) 
Example #13
Source File: merge.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def _get_join_keys(llab, rlab, shape, sort):

    # how many levels can be done without overflow
    pred = lambda i: not is_int64_overflow_possible(shape[:i])
    nlev = next(filter(pred, range(len(shape), 0, -1)))

    # get keys for the first `nlev` levels
    stride = np.prod(shape[1:nlev], dtype='i8')
    lkey = stride * llab[0].astype('i8', subok=False, copy=False)
    rkey = stride * rlab[0].astype('i8', subok=False, copy=False)

    for i in range(1, nlev):
        stride //= shape[i]
        lkey += llab[i] * stride
        rkey += rlab[i] * stride

    if nlev == len(shape):  # all done!
        return lkey, rkey

    # densify current keys to avoid overflow
    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

    llab = [lkey] + llab[nlev:]
    rlab = [rkey] + rlab[nlev:]
    shape = [count] + shape[nlev:]

    return _get_join_keys(llab, rlab, shape, sort) 
Example #14
Source File: merge.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def _get_join_keys(llab, rlab, shape, sort):

    # how many levels can be done without overflow
    pred = lambda i: not is_int64_overflow_possible(shape[:i])
    nlev = next(filter(pred, range(len(shape), 0, -1)))

    # get keys for the first `nlev` levels
    stride = np.prod(shape[1:nlev], dtype='i8')
    lkey = stride * llab[0].astype('i8', subok=False, copy=False)
    rkey = stride * rlab[0].astype('i8', subok=False, copy=False)

    for i in range(1, nlev):
        with np.errstate(divide='ignore'):
            stride //= shape[i]
        lkey += llab[i] * stride
        rkey += rlab[i] * stride

    if nlev == len(shape):  # all done!
        return lkey, rkey

    # densify current keys to avoid overflow
    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

    llab = [lkey] + llab[nlev:]
    rlab = [rkey] + rlab[nlev:]
    shape = [count] + shape[nlev:]

    return _get_join_keys(llab, rlab, shape, sort) 
Example #15
Source File: merge.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _get_join_keys(llab, rlab, shape, sort):

    # how many levels can be done without overflow
    pred = lambda i: not is_int64_overflow_possible(shape[:i])
    nlev = next(filter(pred, range(len(shape), 0, -1)))

    # get keys for the first `nlev` levels
    stride = np.prod(shape[1:nlev], dtype='i8')
    lkey = stride * llab[0].astype('i8', subok=False, copy=False)
    rkey = stride * rlab[0].astype('i8', subok=False, copy=False)

    for i in range(1, nlev):
        with np.errstate(divide='ignore'):
            stride //= shape[i]
        lkey += llab[i] * stride
        rkey += rlab[i] * stride

    if nlev == len(shape):  # all done!
        return lkey, rkey

    # densify current keys to avoid overflow
    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

    llab = [lkey] + llab[nlev:]
    rlab = [rkey] + rlab[nlev:]
    shape = [count] + shape[nlev:]

    return _get_join_keys(llab, rlab, shape, sort) 
Example #16
Source File: pytables.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def process_axes(self, obj, columns=None):
        """ process axes filters """

        # make a copy to avoid side effects
        if columns is not None:
            columns = list(columns)

        # make sure to include levels if we have them
        if columns is not None and self.is_multi_index:
            for n in self.levels:
                if n not in columns:
                    columns.insert(0, n)

        # reorder by any non_index_axes & limit to the select columns
        for axis, labels in self.non_index_axes:
            obj = _reindex_axis(obj, axis, labels, columns)

        # apply the selection filters (but keep in the same order)
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():

                def process_filter(field, filt):

                    for axis_name in obj._AXIS_NAMES.values():
                        axis_number = obj._get_axis_number(axis_name)
                        axis_values = obj._get_axis(axis_name)

                        # see if the field is the name of an axis
                        if field == axis_name:

                            # if we have a multi-index, then need to include
                            # the levels
                            if self.is_multi_index:
                                filt = filt.union(Index(self.levels))

                            takers = op(axis_values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                        # this might be the name of a file IN an axis
                        elif field in axis_values:

                            # we need to filter on this dimension
                            values = _ensure_index(getattr(obj, field).values)
                            filt = _ensure_index(filt)

                            # hack until we support reversed dim flags
                            if isinstance(obj, DataFrame):
                                axis_number = 1 - axis_number
                            takers = op(values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                    raise ValueError(
                        "cannot find the field [%s] for filtering!" % field)

                obj = process_filter(field, filt)

        return obj 
Example #17
Source File: pytables.py    From elasticintel with GNU General Public License v3.0 4 votes vote down vote up
def __init__(self, table, where=None, start=None, stop=None, **kwargs):
        self.table = table
        self.where = where
        self.start = start
        self.stop = stop
        self.condition = None
        self.filter = None
        self.terms = None
        self.coordinates = None

        if is_list_like(where):

            # see if we have a passed coordinate like
            try:
                inferred = lib.infer_dtype(where)
                if inferred == 'integer' or inferred == 'boolean':
                    where = np.asarray(where)
                    if where.dtype == np.bool_:
                        start, stop = self.start, self.stop
                        if start is None:
                            start = 0
                        if stop is None:
                            stop = self.table.nrows
                        self.coordinates = np.arange(start, stop)[where]
                    elif issubclass(where.dtype.type, np.integer):
                        if ((self.start is not None and
                                (where < self.start).any()) or
                            (self.stop is not None and
                                (where >= self.stop).any())):
                            raise ValueError(
                                "where must have index locations >= start and "
                                "< stop"
                            )
                        self.coordinates = where

            except:
                pass

        if self.coordinates is None:

            self.terms = self.generate(where)

            # create the numexpr & the filter
            if self.terms is not None:
                self.condition, self.filter = self.terms.evaluate() 
Example #18
Source File: pytables.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def __init__(self, table, where=None, start=None, stop=None, **kwargs):
        self.table = table
        self.where = where
        self.start = start
        self.stop = stop
        self.condition = None
        self.filter = None
        self.terms = None
        self.coordinates = None

        if is_list_like(where):

            # see if we have a passed coordinate like
            try:
                inferred = lib.infer_dtype(where)
                if inferred == 'integer' or inferred == 'boolean':
                    where = np.asarray(where)
                    if where.dtype == np.bool_:
                        start, stop = self.start, self.stop
                        if start is None:
                            start = 0
                        if stop is None:
                            stop = self.table.nrows
                        self.coordinates = np.arange(start, stop)[where]
                    elif issubclass(where.dtype.type, np.integer):
                        if ((self.start is not None and
                                (where < self.start).any()) or
                            (self.stop is not None and
                                (where >= self.stop).any())):
                            raise ValueError(
                                "where must have index locations >= start and "
                                "< stop"
                            )
                        self.coordinates = where

            except:
                pass

        if self.coordinates is None:

            self.terms = self.generate(where)

            # create the numexpr & the filter
            if self.terms is not None:
                self.condition, self.filter = self.terms.evaluate() 
Example #19
Source File: pytables.py    From Splunking-Crime with GNU Affero General Public License v3.0 4 votes vote down vote up
def process_axes(self, obj, columns=None):
        """ process axes filters """

        # make a copy to avoid side effects
        if columns is not None:
            columns = list(columns)

        # make sure to include levels if we have them
        if columns is not None and self.is_multi_index:
            for n in self.levels:
                if n not in columns:
                    columns.insert(0, n)

        # reorder by any non_index_axes & limit to the select columns
        for axis, labels in self.non_index_axes:
            obj = _reindex_axis(obj, axis, labels, columns)

        # apply the selection filters (but keep in the same order)
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():

                def process_filter(field, filt):

                    for axis_name in obj._AXIS_NAMES.values():
                        axis_number = obj._get_axis_number(axis_name)
                        axis_values = obj._get_axis(axis_name)

                        # see if the field is the name of an axis
                        if field == axis_name:

                            # if we have a multi-index, then need to include
                            # the levels
                            if self.is_multi_index:
                                filt = filt.union(Index(self.levels))

                            takers = op(axis_values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                        # this might be the name of a file IN an axis
                        elif field in axis_values:

                            # we need to filter on this dimension
                            values = _ensure_index(getattr(obj, field).values)
                            filt = _ensure_index(filt)

                            # hack until we support reversed dim flags
                            if isinstance(obj, DataFrame):
                                axis_number = 1 - axis_number
                            takers = op(values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                    raise ValueError(
                        "cannot find the field [%s] for filtering!" % field)

                obj = process_filter(field, filt)

        return obj 
Example #20
Source File: pytables.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 4 votes vote down vote up
def __init__(self, table, where=None, start=None, stop=None):
        self.table = table
        self.where = where
        self.start = start
        self.stop = stop
        self.condition = None
        self.filter = None
        self.terms = None
        self.coordinates = None

        if is_list_like(where):

            # see if we have a passed coordinate like
            try:
                inferred = lib.infer_dtype(where, skipna=False)
                if inferred == 'integer' or inferred == 'boolean':
                    where = np.asarray(where)
                    if where.dtype == np.bool_:
                        start, stop = self.start, self.stop
                        if start is None:
                            start = 0
                        if stop is None:
                            stop = self.table.nrows
                        self.coordinates = np.arange(start, stop)[where]
                    elif issubclass(where.dtype.type, np.integer):
                        if ((self.start is not None and
                                (where < self.start).any()) or
                            (self.stop is not None and
                                (where >= self.stop).any())):
                            raise ValueError(
                                "where must have index locations >= start and "
                                "< stop"
                            )
                        self.coordinates = where

            except ValueError:
                pass

        if self.coordinates is None:

            self.terms = self.generate(where)

            # create the numexpr & the filter
            if self.terms is not None:
                self.condition, self.filter = self.terms.evaluate() 
Example #21
Source File: pytables.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 4 votes vote down vote up
def process_axes(self, obj, columns=None):
        """ process axes filters """

        # make a copy to avoid side effects
        if columns is not None:
            columns = list(columns)

        # make sure to include levels if we have them
        if columns is not None and self.is_multi_index:
            for n in self.levels:
                if n not in columns:
                    columns.insert(0, n)

        # reorder by any non_index_axes & limit to the select columns
        for axis, labels in self.non_index_axes:
            obj = _reindex_axis(obj, axis, labels, columns)

        # apply the selection filters (but keep in the same order)
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():

                def process_filter(field, filt):

                    for axis_name in obj._AXIS_NAMES.values():
                        axis_number = obj._get_axis_number(axis_name)
                        axis_values = obj._get_axis(axis_name)

                        # see if the field is the name of an axis
                        if field == axis_name:

                            # if we have a multi-index, then need to include
                            # the levels
                            if self.is_multi_index:
                                filt = filt.union(Index(self.levels))

                            takers = op(axis_values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                        # this might be the name of a file IN an axis
                        elif field in axis_values:

                            # we need to filter on this dimension
                            values = ensure_index(getattr(obj, field).values)
                            filt = ensure_index(filt)

                            # hack until we support reversed dim flags
                            if isinstance(obj, DataFrame):
                                axis_number = 1 - axis_number
                            takers = op(values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                    raise ValueError("cannot find the field [{field}] for "
                                     "filtering!".format(field=field))

                obj = process_filter(field, filt)

        return obj 
Example #22
Source File: pytables.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def __init__(self, table, where=None, start=None, stop=None, **kwargs):
        self.table = table
        self.where = where
        self.start = start
        self.stop = stop
        self.condition = None
        self.filter = None
        self.terms = None
        self.coordinates = None

        if is_list_like(where):

            # see if we have a passed coordinate like
            try:
                inferred = lib.infer_dtype(where)
                if inferred == 'integer' or inferred == 'boolean':
                    where = np.asarray(where)
                    if where.dtype == np.bool_:
                        start, stop = self.start, self.stop
                        if start is None:
                            start = 0
                        if stop is None:
                            stop = self.table.nrows
                        self.coordinates = np.arange(start, stop)[where]
                    elif issubclass(where.dtype.type, np.integer):
                        if ((self.start is not None and
                                (where < self.start).any()) or
                            (self.stop is not None and
                                (where >= self.stop).any())):
                            raise ValueError(
                                "where must have index locations >= start and "
                                "< stop"
                            )
                        self.coordinates = where

            except:
                pass

        if self.coordinates is None:

            self.terms = self.generate(where)

            # create the numexpr & the filter
            if self.terms is not None:
                self.condition, self.filter = self.terms.evaluate() 
Example #23
Source File: pytables.py    From vnpy_crypto with MIT License 4 votes vote down vote up
def process_axes(self, obj, columns=None):
        """ process axes filters """

        # make a copy to avoid side effects
        if columns is not None:
            columns = list(columns)

        # make sure to include levels if we have them
        if columns is not None and self.is_multi_index:
            for n in self.levels:
                if n not in columns:
                    columns.insert(0, n)

        # reorder by any non_index_axes & limit to the select columns
        for axis, labels in self.non_index_axes:
            obj = _reindex_axis(obj, axis, labels, columns)

        # apply the selection filters (but keep in the same order)
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():

                def process_filter(field, filt):

                    for axis_name in obj._AXIS_NAMES.values():
                        axis_number = obj._get_axis_number(axis_name)
                        axis_values = obj._get_axis(axis_name)

                        # see if the field is the name of an axis
                        if field == axis_name:

                            # if we have a multi-index, then need to include
                            # the levels
                            if self.is_multi_index:
                                filt = filt.union(Index(self.levels))

                            takers = op(axis_values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                        # this might be the name of a file IN an axis
                        elif field in axis_values:

                            # we need to filter on this dimension
                            values = _ensure_index(getattr(obj, field).values)
                            filt = _ensure_index(filt)

                            # hack until we support reversed dim flags
                            if isinstance(obj, DataFrame):
                                axis_number = 1 - axis_number
                            takers = op(values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                    raise ValueError(
                        "cannot find the field [%s] for filtering!" % field)

                obj = process_filter(field, filt)

        return obj 
Example #24
Source File: pytables.py    From recruit with Apache License 2.0 4 votes vote down vote up
def __init__(self, table, where=None, start=None, stop=None):
        self.table = table
        self.where = where
        self.start = start
        self.stop = stop
        self.condition = None
        self.filter = None
        self.terms = None
        self.coordinates = None

        if is_list_like(where):

            # see if we have a passed coordinate like
            try:
                inferred = lib.infer_dtype(where, skipna=False)
                if inferred == 'integer' or inferred == 'boolean':
                    where = np.asarray(where)
                    if where.dtype == np.bool_:
                        start, stop = self.start, self.stop
                        if start is None:
                            start = 0
                        if stop is None:
                            stop = self.table.nrows
                        self.coordinates = np.arange(start, stop)[where]
                    elif issubclass(where.dtype.type, np.integer):
                        if ((self.start is not None and
                                (where < self.start).any()) or
                            (self.stop is not None and
                                (where >= self.stop).any())):
                            raise ValueError(
                                "where must have index locations >= start and "
                                "< stop"
                            )
                        self.coordinates = where

            except ValueError:
                pass

        if self.coordinates is None:

            self.terms = self.generate(where)

            # create the numexpr & the filter
            if self.terms is not None:
                self.condition, self.filter = self.terms.evaluate() 
Example #25
Source File: pytables.py    From recruit with Apache License 2.0 4 votes vote down vote up
def process_axes(self, obj, columns=None):
        """ process axes filters """

        # make a copy to avoid side effects
        if columns is not None:
            columns = list(columns)

        # make sure to include levels if we have them
        if columns is not None and self.is_multi_index:
            for n in self.levels:
                if n not in columns:
                    columns.insert(0, n)

        # reorder by any non_index_axes & limit to the select columns
        for axis, labels in self.non_index_axes:
            obj = _reindex_axis(obj, axis, labels, columns)

        # apply the selection filters (but keep in the same order)
        if self.selection.filter is not None:
            for field, op, filt in self.selection.filter.format():

                def process_filter(field, filt):

                    for axis_name in obj._AXIS_NAMES.values():
                        axis_number = obj._get_axis_number(axis_name)
                        axis_values = obj._get_axis(axis_name)

                        # see if the field is the name of an axis
                        if field == axis_name:

                            # if we have a multi-index, then need to include
                            # the levels
                            if self.is_multi_index:
                                filt = filt.union(Index(self.levels))

                            takers = op(axis_values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                        # this might be the name of a file IN an axis
                        elif field in axis_values:

                            # we need to filter on this dimension
                            values = ensure_index(getattr(obj, field).values)
                            filt = ensure_index(filt)

                            # hack until we support reversed dim flags
                            if isinstance(obj, DataFrame):
                                axis_number = 1 - axis_number
                            takers = op(values, filt)
                            return obj.loc._getitem_axis(takers,
                                                         axis=axis_number)

                    raise ValueError("cannot find the field [{field}] for "
                                     "filtering!".format(field=field))

                obj = process_filter(field, filt)

        return obj