Python itertools.tee() Examples

The following are 30 code examples for showing how to use itertools.tee(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module itertools , or try the search function .

Example 1
def test_peek(self):

        def give_gen():
            for i in range(1, 5):
                yield i

        def give_empty_gen():
            for i in range(1, 1):
                yield i

        obj = give_gen()

        for i in range(1, 5):
            num, new_obj = peek(obj)
            obj, new_obj = itertools.tee(obj)
            self.assertEqual(i, num)

        ret_val = peek(obj)
        obj = give_empty_gen()
        ret_val_1 = peek(obj)

        self.assertEqual(ret_val, None)
        self.assertEqual(ret_val_1, None) 
Example 2
Project: Hierarchical-Sentiment   Author: cedias   File: prepare_data.py    License: MIT License 6 votes vote down vote up
def build_dataset(args):

    print("Building dataset from : {}".format(args.input))
    print("-> Building {} random splits".format(args.nb_splits))

    nlp = spacy.load('en', create_pipeline=custom_pipeline)
    gen_a,gen_b = itertools.tee(data_generator(args.input),2)
    data = [(z["reviewerID"],z["asin"],tok,z["overall"]) for z,tok in zip(tqdm((z for z in gen_a),desc="reading file"),nlp.pipe((x["reviewText"] for x in gen_b), batch_size=1000000, n_threads=8))]

    print(data[0])
    shuffle(data)

    splits = [randint(0,args.nb_splits-1) for _ in range(0,len(data))]
    count = Counter(splits)

    print("Split distribution is the following:")
    print(count)

    return {"data":data,"splits":splits,"rows":("user_id","item_id","review","rating")} 
Example 3
Project: traces   Author: datascopeanalytics   File: timeseries.py    License: MIT License 6 votes vote down vote up
def iterintervals(self, n=2):
        """Iterate over groups of `n` consecutive measurement points in the
        time series.

        """
        # tee the original iterator into n identical iterators
        streams = itertools.tee(iter(self), n)

        # advance the "cursor" on each iterator by an increasing
        # offset, e.g. if n=3:
        #
        #                   [a, b, c, d, e, f, ..., w, x, y, z]
        #  first cursor -->  *
        # second cursor -->     *
        #  third cursor -->        *
        for stream_index, stream in enumerate(streams):
            for _ in range(stream_index):
                next(stream)

        # now, zip the offset streams back together to yield tuples,
        # in the n=3 example it would yield:
        # (a, b, c), (b, c, d), ..., (w, x, y), (x, y, z)
        for intervals in zip(*streams):
            yield intervals 
Example 4
Project: hackerrank   Author: rootulp   File: the-birthday-bar.py    License: MIT License 6 votes vote down vote up
def sliding_window(n, seq):
    """
    Copied from toolz
    https://toolz.readthedocs.io/en/latest/_modules/toolz/itertoolz.html#sliding_window

    A sequence of overlapping subsequences

    >>> list(sliding_window(2, [1, 2, 3, 4]))
    [(1, 2), (2, 3), (3, 4)]

    This function creates a sliding window suitable for transformations like
    sliding means / smoothing

    >>> mean = lambda seq: float(sum(seq)) / len(seq)
    >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))
    [1.5, 2.5, 3.5]
    """
    return zip(*(collections.deque(itertools.islice(it, i), 0) or it
                 for i, it in enumerate(itertools.tee(seq, n)))) 
Example 5
Project: Computable   Author: ktraunmueller   File: __init__.py    License: MIT License 6 votes vote down vote up
def follow_path(self, path, types, call_scope):
        """
        Follows a path like::

            self.follow_path(iter(['Foo', 'bar']), [a_type], from_somewhere)

        to follow a call like ``module.a_type.Foo.bar`` (in ``from_somewhere``).
        """
        results_new = []
        iter_paths = itertools.tee(path, len(types))

        for i, typ in enumerate(types):
            fp = self._follow_path(iter_paths[i], typ, call_scope)
            if fp is not None:
                results_new += fp
            else:
                # This means stop iteration.
                return types
        return results_new 
Example 6
Project: pythonfinder   Author: sarugaku   File: utils.py    License: MIT License 6 votes vote down vote up
def unnest(item):
    # type: (Any) -> Iterable[Any]
    target = None  # type: Optional[Iterable]
    if isinstance(item, Iterable) and not isinstance(item, six.string_types):
        item, target = itertools.tee(item, 2)
    else:
        target = item
    if getattr(target, "__iter__", None):
        for el in target:
            if isinstance(el, Iterable) and not isinstance(el, six.string_types):
                el, el_copy = itertools.tee(el, 2)
                for sub in unnest(el_copy):
                    yield sub
            else:
                yield el
    else:
        yield target 
Example 7
Project: signac   Author: glotzerlab   File: schema.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __call__(self, jobs_or_statepoints):
        "Evaluate the schema for the given state points."
        s = dict()
        iterators = itertools.tee(jobs_or_statepoints, len(self))
        for key, it in zip(self, iterators):
            values = []
            keys = key.split('.')
            for sp in it:
                if not isinstance(sp, Mapping):
                    sp = sp.statepoint
                v = sp[keys[0]]
                for k in keys[1:]:
                    v = v[k]
                values.append(v)
            s[key] = _collect_by_type(values)
        return ProjectSchema(s) 
Example 8
Project: EDeN   Author: fabriziocosta   File: util.py    License: MIT License 5 votes vote down vote up
def random_bipartition_iter(iterable, relative_size=.5, random_state=1):
    """random_bipartition_iter."""
    size_iterable, iterable1, iterable2 = tee(iterable, 3)
    size = iterator_size(size_iterable)
    part1_ids, part2_ids = random_bipartition(
        size, relative_size=relative_size, random_state=random_state)
    part1_iterable = selection_iterator(iterable1, part1_ids)
    part2_iterable = selection_iterator(iterable2, part2_ids)
    return part1_iterable, part2_iterable 
Example 9
Project: EDeN   Author: fabriziocosta   File: ml.py    License: MIT License 5 votes vote down vote up
def random_bipartition_iter(iterable, relative_size=.5, random_state=1):
    """random_bipartition_iter."""
    size_iterable, iterable1, iterable2 = tee(iterable, 3)
    size = iterator_size(size_iterable)
    part1_ids, part2_ids = random_bipartition(
        size, relative_size=relative_size, random_state=random_state)
    part1_iterable = selection_iterator(iterable1, part1_ids)
    part2_iterable = selection_iterator(iterable2, part2_ids)
    return part1_iterable, part2_iterable 
Example 10
Project: EDeN   Author: fabriziocosta   File: ml.py    License: MIT License 5 votes vote down vote up
def join_pre_processes(iterable, pre_processes=None, weights=None):
    """join_pre_processes."""
    graphs_list = list()
    assert(len(weights) == len(pre_processes)), 'Different lengths'
    # NOTE: we have to duplicate the sequences iterator if we want to use
    # different modifiers in parallel
    iterables = tee(iterable, len(pre_processes))
    for pre_process_item, iterable_item in zip(pre_processes, iterables):
        graphs_list.append(pre_process_item(iterable_item))
    return (graphs_list, weights) 
Example 11
Project: EDeN   Author: fabriziocosta   File: graph.py    License: MIT License 5 votes vote down vote up
def auto_relabel(graphs, n_clusters=16, **opts):
    """Label nodes with cluster id."""
    graphs, graphs_ = tee(graphs)
    label_list, vecs_list = auto_label(graphs_, n_clusters=n_clusters, **opts)
    relabeled_graphs = []
    for labels, vecs, orig_graph in zip(label_list, vecs_list, graphs):
        graph = nx.Graph(orig_graph)
        for label, vec, u in zip(labels, vecs, graph.nodes()):
            graph.nodes[u]['label'] = label
            graph.nodes[u]['vec'] = list(vec)
        relabeled_graphs.append(graph)
    return relabeled_graphs 
Example 12
Project: pinax-documents   Author: pinax   File: models.py    License: MIT License 5 votes vote down vote up
def shared_parent(self):
        """
        Returns the folder object that is the shared parent (the root of
        a shared folder hierarchy) or None if there is no shared parent.
        """
        root = self
        a, b = itertools.tee(reversed(self.breadcrumbs()))
        next(b, None)
        for folder, parent in itertools.zip_longest(a, b):
            if folder.shared:
                root = folder
            if parent is None or not parent.shared:
                break
        return root 
Example 13
Project: jawfish   Author: war-and-code   File: heapq.py    License: MIT License 5 votes vote down vote up
def nsmallest(n, iterable, key=None):
    """Find the n smallest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key)[:n]
    """
    # Short-cut for n==1 is to use min() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [min(chain(head, it))]
        return [min(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = zip(iterable, count())                         # decorate
        result = _nsmallest(n, it)
        return [r[0] for r in result]                       # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = zip(map(key, in1), count(), in2)                   # decorate
    result = _nsmallest(n, it)
    return [r[2] for r in result]                           # undecorate 
Example 14
Project: jawfish   Author: war-and-code   File: heapq.py    License: MIT License 5 votes vote down vote up
def nlargest(n, iterable, key=None):
    """Find the n largest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
    """

    # Short-cut for n==1 is to use max() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [max(chain(head, it))]
        return [max(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key, reverse=True)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = zip(iterable, count(0,-1))                     # decorate
        result = _nlargest(n, it)
        return [r[0] for r in result]                       # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = zip(map(key, in1), count(0,-1), in2)               # decorate
    result = _nlargest(n, it)
    return [r[2] for r in result]                           # undecorate 
Example 15
Project: lineflow   Author: tofunlp   File: core.py    License: MIT License 5 votes vote down vote up
def __iter__(self) -> Iterator[Any]:
        if self._computed:
            yield from self._dataset
        else:
            iterable, self._iterable = tee(self._iterable)
            yield from iterable 
Example 16
Project: python-podman   Author: containers   File: test_images.py    License: Apache License 2.0 5 votes vote down vote up
def test_search(self):
        actual = self.pclient.images.search("alpine", 25)
        names, length = itertools.tee(actual)

        for img in names:
            self.assertIn("alpine", img.name)
        self.assertTrue(0 < len(list(length)) <= 25) 
Example 17
Project: pyspider   Author: binux   File: taskdb.py    License: Apache License 2.0 5 votes vote down vote up
def drop(self, project):
        self.redis.srem(self.__prefix__ + 'projects', project)

        if self.scan_available:
            scan_method = self.redis.scan_iter
        else:
            scan_method = self.redis.keys

        for each in itertools.tee(scan_method("%s%s_*" % (self.__prefix__, project)), 100):
            each = list(each)
            if each:
                self.redis.delete(*each) 
Example 18
Project: transformpy   Author: airbnb   File: base.py    License: MIT License 5 votes vote down vote up
def tee(self, tee, *args, **kwargs):
        if not isinstance(tee, TeePipe):
            tee = TeePipe(tee, *args, **kwargs)
        return self.__ins_add(self._pipeline, tee, TransformType.TEE, args, kwargs) 
Example 19
Project: transformpy   Author: airbnb   File: base.py    License: MIT License 5 votes vote down vote up
def __check_pipeline(self):
        '''
        Ensure that pipeline runnable and behaves as expected. Currently checks
        that:
            - Clustering operations are followed by a nested transform or an
              aggregation. If not, an explicity generator -> list mapping operation
              is added to avoid subsequent operations being confused by the
              generator of generators that clustering operations yield.
            - If the last transformation is a clustering operation, that the
              same explicity mapping is done to ensure sensible output from the
              transform pipe.
        '''

        def pairwise(iterable):
            "s -> (s0,s1), (s1,s2), (s2, s3), ..."
            a, b = tee(iterable)
            next(b, None)
            return zip(a, b)

        pipeline = self._pipeline[:1]

        for a, b in pairwise(self._pipeline):
            if a.type == TransformType.CLUSTER and b.type not in (TransformType.NESTED, TransformType.AGGREGATE):
                logging.warning("A non-nested non-aggregation transform has been used after a clustering pipe. Adding a potentially memory inefficient pipe segment to convert cluster generator to list.")
                pipeline.append(self.__ins(lambda x: list(x), TransformType.MAP, [], {}))
            pipeline.append(b)

        if pipeline[-1].type == TransformType.CLUSTER:
            pipeline.append(self.__ins(lambda x: list(x), TransformType.MAP, [], {}))

        return pipeline 
Example 20
Project: link-prediction_with_deep-learning   Author: cambridgeltl   File: wvlib.py    License: MIT License 5 votes vote down vote up
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return izip(a, b)

# transated from http://www.hackersdelight.org/hdcodetxt/snoob.c.txt 
Example 21
Project: tomlkit   Author: sdispater   File: source.py    License: MIT License 5 votes vote down vote up
def __enter__(self):  # type: () -> None
        # Entering this context manager - save the state
        if PY2:
            # Python 2.7 does not allow to directly copy
            # an iterator, so we have to make tees of the original
            # chars iterator.
            self._source._chars, self._chars = itertools.tee(self._source._chars)
        else:
            self._chars = copy(self._source._chars)
        self._idx = self._source._idx
        self._current = self._source._current
        self._marker = self._source._marker

        return self 
Example 22
Project: ratcave   Author: ratcave   File: vertex.py    License: MIT License 5 votes vote down vote up
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b) 
Example 23
Project: open-synthesis   Author: twschiller   File: util.py    License: GNU General Public License v3.0 5 votes vote down vote up
def partition(pred, iterable):
    """Use a predicate to partition entries into false entries and true entries."""
    # https://stackoverflow.com/questions/8793772/how-to-split-a-sequence-according-to-a-predicate
    # NOTE: this might iterate over the collection twice
    # NOTE: need to use filter(s) here because we're lazily dealing with iterators
    it1, it2 = itertools.tee(iterable)
    return itertools.filterfalse(pred, it1), filter(pred, it2)  # pylint: disable=bad-builtin 
Example 24
Project: linter-pylama   Author: AtomLinter   File: utils.py    License: MIT License 5 votes vote down vote up
def pairwise(iterable, default_value):
    """Return pairs of items from `iterable`.

    pairwise([1, 2, 3], default_value=None) -> (1, 2) (2, 3), (3, None)
    """
    a, b = tee(iterable)
    _ = next(b, default_value)
    return zip_longest(a, b, fillvalue=default_value) 
Example 25
Project: recruit   Author: Frank-qlu   File: formparser.py    License: Apache License 2.0 5 votes vote down vote up
def parse(self, file, boundary, content_length):
        formstream, filestream = tee(
            self.parse_parts(file, boundary, content_length), 2
        )
        form = (p[1] for p in formstream if p[0] == "form")
        files = (p[1] for p in filestream if p[0] == "file")
        return self.cls(form), self.cls(files) 
Example 26
Project: E-Safenet   Author: c3c   File: partial_binary.py    License: GNU General Public License v2.0 5 votes vote down vote up
def pairwise(iterable):
    a, b = itertools.tee(iterable)
    next(b, None)
    return itertools.izip(a, b) 
Example 27
Project: pdvega   Author: altair-viz   File: utils.py    License: MIT License 5 votes vote down vote up
def prev_this_next(it, sentinel=None):
    """Utility to return (prev, this, next) tuples from an iterator"""
    i1, i2, i3 = tee(it, 3)
    next(i3, None)
    return zip(chain([sentinel], i1), i2, chain(i3, [sentinel])) 
Example 28
Project: django-echarts   Author: kinegratii   File: fetch.py    License: MIT License 5 votes vote down vote up
def ifetch_multiple(iterable, *keys, defaults=None, getter=None):
    defaults = defaults or {}
    if len(keys) > 1:
        iters = tee(iterable, len(keys))
    else:
        iters = (iterable,)
    iters = [ifetch_single(it, key, default=defaults.get(key, EMPTY), getter=getter) for it, key in zip(iters, keys)]
    return iters 
Example 29
Project: jbox   Author: jpush   File: formparser.py    License: MIT License 5 votes vote down vote up
def parse(self, file, boundary, content_length):
        formstream, filestream = tee(
            self.parse_parts(file, boundary, content_length), 2)
        form = (p[1] for p in formstream if p[0] == 'form')
        files = (p[1] for p in filestream if p[0] == 'file')
        return self.cls(form), self.cls(files) 
Example 30
Project: pycomm3   Author: ottowayi   File: responses.py    License: MIT License 5 votes vote down vote up
def _parse_reply(self):
        super()._parse_reply()
        num_replies = unpack_uint(self.data)
        offset_data = self.data[2:2 + 2 * num_replies]
        offsets = (unpack_uint(offset_data[i:i+2]) for i in range(0, len(offset_data), 2))
        start, end = tee(offsets)  # split offsets into start/end indexes
        next(end)   # advance end by 1 so 2nd item is the end index for the first item
        reply_data = [self.data[i:j] for i, j in zip_longest(start, end)]
        values = []

        for data, tag in zip(reply_data, self.tags):
            service = unpack_uint(data)
            service_status = data[2]
            tag['service_status'] = service_status
            if service_status != SUCCESS:
                tag['error'] = f'{get_service_status(service_status)} - {get_extended_status(data, 2)}'

            if service == TAG_SERVICES_REPLY['Read Tag']:
                if service_status == SUCCESS:
                    value, dt = parse_read_reply(data[4:], tag['tag_info'], tag['elements'])
                else:
                    value, dt = None, None

                values.append(value)
                tag['value'] = value
                tag['data_type'] = dt

        self.values = values