Python toolz.compose() Examples

The following are 17 code examples of toolz.compose(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module toolz , or try the search function .
Example #1
Source File: labelarray.py    From catalyst with Apache License 2.0 6 votes vote down vote up
def matches(self, pattern):
        """
        Elementwise regex match.

        Parameters
        ----------
        pattern : str or compiled regex

        Returns
        -------
        matches : np.ndarray[bool]
            An array with the same shape as self indicating whether each
            element of self was matched by ``pattern``.
        """
        return self.map_predicate(compose(bool, pattern.match))

    # These types all implement an O(N) __contains__, so pre-emptively
    # coerce to `set`. 
Example #2
Source File: rules.py    From ibis with Apache License 2.0 6 votes vote down vote up
def all_of(inners, arg):
    """All of the inner valudators must pass.

    The order of inner validators matters.

    Parameters
    ----------
    inners : List[validator]
      Functions are applied from right to left so allof([rule1, rule2], arg) is
      the same as rule1(rule2(arg)).
    arg : Any
      Value to be validated.

    Returns
    -------
    arg : Any
      Value maybe coerced by inner validators to the appropiate types
    """
    return compose(*inners)(arg) 
Example #3
Source File: selection.py    From ibis with Apache License 2.0 5 votes vote down vote up
def physical_tables_join(join):
    # Physical roots of Join nodes are the unique physical roots of their
    # left and right TableNodes.
    func = compose(physical_tables, methodcaller('op'))
    return list(unique(concat(map(func, (join.left, join.right))))) 
Example #4
Source File: test_factor.py    From catalyst with Apache License 2.0 5 votes vote down vote up
def _test_quantiles_uneven_buckets(self):
        permute = partial(permute_rows, 5)
        shape = (5, 5)

        factor_data = permute(log1p(arange(25, dtype=float).reshape(shape)))
        mask_data = permute(self.eye_mask(shape=shape))

        f = F()
        m = Mask()

        permuted_array = compose(permute, partial(array, dtype=int64_dtype))
        self.check_terms(
            terms={
                '3_masked': f.quantiles(bins=3, mask=m),
                '7_masked': f.quantiles(bins=7, mask=m),
            },
            initial_workspace={
                f: factor_data,
                m: mask_data,
            },
            expected={
                '3_masked': permuted_array([[-1, 0,  0,  1,  2],
                                            [0, -1,  0,  1,  2],
                                            [0,  0, -1,  1,  2],
                                            [0,  0,  1, -1,  2],
                                            [0,  0,  1,  2, -1]]),
                '7_masked': permuted_array([[-1, 0,  2,  4,  6],
                                            [0, -1,  2,  4,  6],
                                            [0,  2, -1,  4,  6],
                                            [0,  2,  4, -1,  6],
                                            [0,  2,  4,  6, -1]]),
            },
            mask=self.build_mask(self.ones_mask(shape=shape)),
        ) 
Example #5
Source File: test_factor.py    From zipline-chinese with Apache License 2.0 5 votes vote down vote up
def test_quantiles_uneven_buckets(self):
        permute = partial(permute_rows, 5)
        shape = (5, 5)

        factor_data = permute(log1p(arange(25, dtype=float).reshape(shape)))
        mask_data = permute(self.eye_mask(shape=shape))

        f = F()
        m = Mask()

        permuted_array = compose(permute, partial(array, dtype=int64_dtype))
        self.check_terms(
            terms={
                '3_masked': f.quantiles(bins=3, mask=m),
                '7_masked': f.quantiles(bins=7, mask=m),
            },
            initial_workspace={
                f: factor_data,
                m: mask_data,
            },
            expected={
                '3_masked': permuted_array([[-1, 0,  0,  1,  2],
                                            [0, -1,  0,  1,  2],
                                            [0,  0, -1,  1,  2],
                                            [0,  0,  1, -1,  2],
                                            [0,  0,  1,  2, -1]]),
                '7_masked': permuted_array([[-1, 0,  2,  4,  6],
                                            [0, -1,  2,  4,  6],
                                            [0,  2, -1,  4,  6],
                                            [0,  2,  4, -1,  6],
                                            [0,  2,  4,  6, -1]]),
            },
            mask=self.build_mask(self.ones_mask(shape=shape)),
        ) 
Example #6
Source File: utils.py    From databrewer with MIT License 5 votes vote down vote up
def format_results(terminal_width, key_list, separator, text_list,
                   left_align=True, min_factor=3, **kwargs):
    """Returns formatted results in two columns.
    """
    key_width = max(map(len, key_list))
    separator_length = len(separator)
    desc_wrap = toolz.identity
    if terminal_width:
        if key_width / terminal_width > .5:
            key_width = terminal_width // 2 - 3
        text_width = terminal_width - key_width - separator_length
        if text_width * min_factor > terminal_width:
            desc_wrap = toolz.compose(
                ('\n' + ' ' * (key_width + separator_length)).join,
                toolz.partial(textwrap.wrap, width=text_width, **kwargs),
            )

    if left_align:
        fmt = '%-*s%s%s'
    else:
        fmt = '%*s%s%s'

    for key, text in zip(key_list, text_list):
        text = desc_wrap(text)
        if len(key) > key_width:
            yield fmt % (key_width, key, separator, '')
            yield fmt % (key_width, '', ' ' * separator_length, text)
        else:
            yield fmt % (key_width, key, separator, text) 
Example #7
Source File: test.py    From seismic-deeplearning with MIT License 5 votes vote down vote up
def _compose_processing_pipeline(depth, aug=None):
    steps = []
    if aug is not None:
        steps.append(_apply_augmentation(aug))

    if depth == "patch":
        steps.append(_add_depth)

    steps.append(_to_torch)
    steps.append(_expand_dims_if_necessary)
    steps.reverse()
    return compose(*steps) 
Example #8
Source File: utilities.py    From seismic-deeplearning with MIT License 5 votes vote down vote up
def compose_processing_pipeline(depth, aug=None):
    steps = []
    if aug is not None:
        steps.append(_apply_augmentation(aug))

    if depth == "patch":
        steps.append(_add_depth)

    steps.append(_to_torch)
    steps.append(_expand_dims_if_necessary)
    steps.reverse()
    return compose(*steps) 
Example #9
Source File: lineage.py    From ibis with Apache License 2.0 5 votes vote down vote up
def visitor(self):
        return compose(reversed, list) 
Example #10
Source File: test_factor.py    From zipline-chinese with Apache License 2.0 4 votes vote down vote up
def test_quantiles_unmasked(self, seed):
        permute = partial(permute_rows, seed)

        shape = (6, 6)

        # Shuffle the input rows to verify that we don't depend on the order.
        # Take the log to ensure that we don't depend on linear scaling or
        # integrality of inputs
        factor_data = permute(log1p(arange(36, dtype=float).reshape(shape)))

        f = self.f

        # Apply the same shuffle we applied to the input rows to our
        # expectations. Doing it this way makes it obvious that our
        # expectation corresponds to our input, while still testing against
        # a range of input orderings.
        permuted_array = compose(permute, partial(array, dtype=int64_dtype))
        self.check_terms(
            terms={
                '2': f.quantiles(bins=2),
                '3': f.quantiles(bins=3),
                '6': f.quantiles(bins=6),
            },
            initial_workspace={
                f: factor_data,
            },
            expected={
                # The values in the input are all increasing, so the first half
                # of each row should be in the bottom bucket, and the second
                # half should be in the top bucket.
                '2': permuted_array([[0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1]]),
                # Similar for three buckets.
                '3': permuted_array([[0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2]]),
                # In the limiting case, we just have every column different.
                '6': permuted_array([[0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5]]),
            },
            mask=self.build_mask(self.ones_mask(shape=shape)),
        ) 
Example #11
Source File: test_filter.py    From catalyst with Apache License 2.0 4 votes vote down vote up
def _test_top_and_bottom_with_groupby_and_mask(self, dtype, seed):
        permute = partial(permute_rows, seed)
        permuted_array = compose(permute, partial(array, dtype=int64_dtype))

        shape = (8, 8)

        # Shuffle the input rows to verify that we correctly pick out the top
        # values independently of order.
        factor_data = permute(arange(0, 64, dtype=dtype).reshape(shape))
        classifier_data = permuted_array([[0, 0, 1, 1, 2, 2, 0, 0],
                                          [0, 0, 1, 1, 2, 2, 0, 0],
                                          [0, 1, 2, 3, 0, 1, 2, 3],
                                          [0, 1, 2, 3, 0, 1, 2, 3],
                                          [0, 0, 0, 0, 1, 1, 1, 1],
                                          [0, 0, 0, 0, 1, 1, 1, 1],
                                          [0, 0, 0, 0, 0, 0, 0, 0],
                                          [0, 0, 0, 0, 0, 0, 0, 0]])

        f = self.f
        c = self.c

        self.check_terms(
            terms={
                'top2': f.top(2, groupby=c),
                'bottom2': f.bottom(2, groupby=c),
            },
            initial_workspace={
                f: factor_data,
                c: classifier_data,
            },
            expected={
                # Should be the rightmost two entries in classifier_data,
                # ignoring the off-diagonal.
                'top2': permuted_array([[0, 1, 1, 1, 1, 1, 1, 0],
                                        [0, 1, 1, 1, 1, 1, 0, 1],
                                        [1, 1, 1, 1, 1, 0, 1, 1],
                                        [1, 1, 1, 1, 0, 1, 1, 1],
                                        [0, 1, 1, 0, 0, 0, 1, 1],
                                        [0, 1, 0, 1, 0, 0, 1, 1],
                                        [0, 0, 0, 0, 0, 0, 1, 1],
                                        [0, 0, 0, 0, 0, 0, 1, 1]], dtype=bool),
                # Should be the rightmost two entries in classifier_data,
                # ignoring the off-diagonal.
                'bottom2': permuted_array([[1, 1, 1, 1, 1, 1, 0, 0],
                                           [1, 1, 1, 1, 1, 1, 0, 0],
                                           [1, 1, 1, 1, 1, 0, 1, 1],
                                           [1, 1, 1, 1, 0, 1, 1, 1],
                                           [1, 1, 0, 0, 1, 1, 0, 0],
                                           [1, 1, 0, 0, 1, 1, 0, 0],
                                           [1, 0, 1, 0, 0, 0, 0, 0],
                                           [0, 1, 1, 0, 0, 0, 0, 0]],
                                          dtype=bool),
            },
            mask=self.build_mask(permute(rot90(self.eye_mask(shape=shape)))),
        ) 
Example #12
Source File: test_factor.py    From catalyst with Apache License 2.0 4 votes vote down vote up
def _test_quantiles_unmasked(self, seed):
        permute = partial(permute_rows, seed)

        shape = (6, 6)

        # Shuffle the input rows to verify that we don't depend on the order.
        # Take the log to ensure that we don't depend on linear scaling or
        # integrality of inputs
        factor_data = permute(log1p(arange(36, dtype=float).reshape(shape)))

        f = self.f

        # Apply the same shuffle we applied to the input rows to our
        # expectations. Doing it this way makes it obvious that our
        # expectation corresponds to our input, while still testing against
        # a range of input orderings.
        permuted_array = compose(permute, partial(array, dtype=int64_dtype))
        self.check_terms(
            terms={
                '2': f.quantiles(bins=2),
                '3': f.quantiles(bins=3),
                '6': f.quantiles(bins=6),
            },
            initial_workspace={
                f: factor_data,
            },
            expected={
                # The values in the input are all increasing, so the first half
                # of each row should be in the bottom bucket, and the second
                # half should be in the top bucket.
                '2': permuted_array([[0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1],
                                     [0, 0, 0, 1, 1, 1]]),
                # Similar for three buckets.
                '3': permuted_array([[0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2],
                                     [0, 0, 1, 1, 2, 2]]),
                # In the limiting case, we just have every column different.
                '6': permuted_array([[0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5],
                                     [0, 1, 2, 3, 4, 5]]),
            },
            mask=self.build_mask(self.ones_mask(shape=shape)),
        ) 
Example #13
Source File: input_validation.py    From catalyst with Apache License 2.0 4 votes vote down vote up
def expect_types(__funcname=_qualified_name, **named):
    """
    Preprocessing decorator that verifies inputs have expected types.

    Examples
    --------
    >>> @expect_types(x=int, y=str)
    ... def foo(x, y):
    ...    return x, y
    ...
    >>> foo(2, '3')
    (2, '3')
    >>> foo(2.0, '3')  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    Traceback (most recent call last):
       ...
    TypeError: ...foo() expected a value of type int for argument 'x',
    but got float instead.

    Notes
    -----
    A special argument, __funcname, can be provided as a string to override the
    function name shown in error messages.  This is most often used on __init__
    or __new__ methods to make errors refer to the class name instead of the
    function name.
    """
    for name, type_ in iteritems(named):
        if not isinstance(type_, (type, tuple)):
            raise TypeError(
                "expect_types() expected a type or tuple of types for "
                "argument '{name}', but got {type_} instead.".format(
                    name=name, type_=type_,
                )
            )

    def _expect_type(type_):
        # Slightly different messages for type and tuple of types.
        _template = (
            "%(funcname)s() expected a value of type {type_or_types} "
            "for argument '%(argname)s', but got %(actual)s instead."
        )
        if isinstance(type_, tuple):
            template = _template.format(
                type_or_types=' or '.join(map(_qualified_name, type_))
            )
        else:
            template = _template.format(type_or_types=_qualified_name(type_))

        return make_check(
            exc_type=TypeError,
            template=template,
            pred=lambda v: not isinstance(v, type_),
            actual=compose(_qualified_name, type),
            funcname=__funcname,
        )

    return preprocess(**valmap(_expect_type, named)) 
Example #14
Source File: input_validation.py    From pylivetrader with Apache License 2.0 4 votes vote down vote up
def expect_types(__funcname=_qualified_name, **named):
    """
    Preprocessing decorator that verifies inputs have expected types.

    Usage
    -----
    >>> @expect_types(x=int, y=str)
    ... def foo(x, y):
    ...    return x, y
    ...
    >>> foo(2, '3')
    (2, '3')
    >>> foo(2.0, '3')  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    Traceback (most recent call last):
       ...
    TypeError: ...foo() expected a value of type int for argument 'x',
    but got float instead.

    Notes
    -----
    A special argument, __funcname, can be provided as a string to override the
    function name shown in error messages.  This is most often used on __init__
    or __new__ methods to make errors refer to the class name instead of the
    function name.
    """
    for name, type_ in iteritems(named):
        if not isinstance(type_, (type, tuple)):
            raise TypeError(
                "expect_types() expected a type or tuple of types for "
                "argument '{name}', but got {type_} instead.".format(
                    name=name, type_=type_,
                )
            )

    def _expect_type(type_):
        # Slightly different messages for type and tuple of types.
        _template = (
            "%(funcname)s() expected a value of type {type_or_types} "
            "for argument '%(argname)s', but got %(actual)s instead."
        )
        if isinstance(type_, tuple):
            template = _template.format(
                type_or_types=' or '.join(map(_qualified_name, type_))
            )
        else:
            template = _template.format(type_or_types=_qualified_name(type_))

        return make_check(
            exc_type=TypeError,
            template=template,
            pred=lambda v: not isinstance(v, type_),
            actual=compose(_qualified_name, type),
            funcname=__funcname,
        )

    return preprocess(**valmap(_expect_type, named)) 
Example #15
Source File: gpu_logger.py    From gpu_monitor with MIT License 4 votes vote down vote up
def start_logger(ip_or_url,
                 username,
                 password,
                 database,
                 port=8086,
                 series_name='gpu_measurements',
                 polling_interval=1,
                 retention_duration=MEASUREMENTS_RETENTION_DURATION,
                 **tags):
    """ Starts GPU logger

    Logs GPU measurements to an influxdb database

    Parameters
    ----------
    ip_or_url: ip or url of influxdb
    username: Username to log into influxdb database
    password: Password to log into influxdb database
    database: Name of database to log data to. It will create the database if one doesn't exist
    port: A number indicating the port on which influxdb is listening
    series_name: Name of series/table to log data to
    polling_interval: polling interval for measurements in seconds [default:1]
    retention_duration: the duration to retain the measurements for valid values are 1h, 90m, 12h, 7d, and 4w. default:1d
    tags: One or more tags to apply to the data. These can then be used to group or select timeseries
          Example: --machine my_machine --cluster kerb01

    """

    logger = _logger()
    logger.info('Trying to connect to {} on port {} as {}'.format(ip_or_url, port, username))
    try:
        client = InfluxDBClient(ip_or_url, port, username, password)
        response = client.ping()
    except ConnectionError:
        logger.warning('Could not connect to InfluxDB. GPU metrics NOT being recorded')
        raise MetricsRecordingFailed()

    logger.info('Connected | version {}'.format(response))
    _switch_to_database(client, database)

    logger.info('Measurement retention duration {}'.format(retention_duration))
    _set_retention_policy(client, database, retention_duration)

    to_db = compose(_create_influxdb_writer(client, tags=tags),
                    _gpu_to_influxdb_format(series_name))
    logger.info('Starting logging...')
    return start_pushing_measurements_to(to_db, polling_interval=polling_interval) 
Example #16
Source File: _343.py    From codetransformer with GNU General Public License v2.0 4 votes vote down vote up
def make_while_loop_test_expr(loop_body_instrs):
    """
    Make an expression in the context of a while-loop test.

    Code of the form::

        while <expr>:
            <body>

    generates a POP_JUMP_IF_FALSE for the loop test, while code of the form::

        while not <expr>:
            <body>

    generates a POP_JUMP_IF_TRUE for the loop test.

    Code of the form::

        while True:
            <body>

    generates no jumps at all.
    """
    bottom_of_loop = loop_body_instrs[-1]
    is_jump_to_bottom = compose(op.is_(bottom_of_loop), op.attrgetter('arg'))

    # Consume instructions until we find a jump to the bottom of the loop.
    test_builders = deque(
        popwhile(complement(is_jump_to_bottom), loop_body_instrs, side='left')
    )
    # If we consumed the entire loop body without finding a jump, assume this
    # is a while True loop.  Return the rest of the instructions as the loop
    # body.
    if not loop_body_instrs:
        return ast.NameConstant(value=True), test_builders

    # Top of the body is either a POP_JUMP_IF_TRUE or POP_JUMP_IF_FALSE.
    jump = loop_body_instrs.popleft()
    expr = make_expr(test_builders)
    if isinstance(jump, instrs.POP_JUMP_IF_TRUE):
        return ast.UnaryOp(op=ast.Not(), operand=expr), loop_body_instrs
    else:
        return expr, loop_body_instrs 
Example #17
Source File: transformation.py    From fklearn with Apache License 2.0 4 votes vote down vote up
def truncate_categorical(df: pd.DataFrame,
                         columns_to_truncate: List[str],
                         percentile: float,
                         replacement: Union[str, float] = -9999,
                         replace_unseen: Union[str, float] = -9999,
                         store_mapping: bool = False) -> LearnerReturnType:
    """
    Truncate infrequent categories and replace them by a single one.
    You can think of it like "others" category.

    Parameters
    ----------
    df : pandas.DataFrame
        A Pandas' DataFrame that must contain a `prediction_column` columns.

    columns_to_truncate : list of str
        The df columns names to perform the truncation.

    percentile : float
        Categories less frequent than the percentile will be replaced by the
        same one.

    replacement: int, str, float or nan
        The value to use when a category is less frequent that the percentile
        variable.

    replace_unseen : int, str, float, or nan
        The value to impute unseen categories.

    store_mapping : bool (default: False)
        Whether to store the feature value -> integer dictionary in the log.
    """
    get_categs = lambda col: (df[col].value_counts() / len(df)).to_dict()
    update = lambda d: map(lambda kv: (kv[0], replacement) if kv[1] <= percentile else (kv[0], kv[0]), d.items())
    categs_to_dict = lambda categ_dict: dict(categ_dict)

    vec = {column: compose(categs_to_dict, update, get_categs)(column) for column in columns_to_truncate}

    def p(new_df: pd.DataFrame) -> pd.DataFrame:
        return apply_replacements(new_df, columns_to_truncate, vec, replace_unseen)

    p.__doc__ = learner_pred_fn_docstring("truncate_categorical")

    log: LearnerLogType = {'truncate_categorical': {
        'transformed_column': columns_to_truncate,
        'replace_unseen': replace_unseen}
    }

    if store_mapping:
        log["truncate_categorical"]["mapping"] = vec

    return p, p(df), log