Python itertools.tee() Examples

The following are code examples for showing how to use itertools.tee(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don't like. You can also save this page to your account.

Example 1
Project: dontwi   Author: vocalodon   File: dontwi.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def dump_status_strings(conf):
    dontwi = Dontwi(conf)
    in_cn = dontwi.get_connector("inbound")
    in_cn.connect()
    operation_cf = dontwi.config.items["operation"]
    trigger_str = dontwi.get_trigger()
    [since, until, limit] = [
        dontwi.config.inbound.get(option, "")
        for option in ["since", "until", "limit"]]
    statuses, statuses2 = tee(in_cn.get_timeline_statuses_by_hashtag(
        hashtag=trigger_str, since=since, until=until, limit=limit))
    status_pr = StatusText(dontwi.config.outbound)
    result_log = ResultLog(dontwi.config.items)
    summaries = dontwi.summaries_to_be_listed_in_waiting_list(result_log=result_log,
                                                              status_pr=status_pr,
                                                              statuses=statuses,
                                                              trigger_str=trigger_str)
    status_dc = {a_status.status["id"]: a_status.status["content"]
                 for a_status in statuses2}
    dump_strs = ["{0}\n{1}\n{2}\n[{3}]".format(a_summary["inbound_status_id"], a_summary["status_string"],
                                               a_summary["inbound_status_url"], status_dc[a_summary["inbound_status_id"]])
                 for a_summary in summaries]
    for lint_str in dump_strs:
        print(lint_str) 
Example 2
Project: ProtScan   Author: gianlucacorrado   File: model.py    (MIT License) View Source Project 6 votes vote down vote up
def get_supervised_data(self, preprocessed, bin_sites,
                            active_learning=False, random_state=1234,
                            n_jobs=-1):
        """Compute the feature matrix and the regression values."""
        preprocessed, preprocessed_ = tee(preprocessed)
        if self.mode == 'sequence':
            dists = [attr['dist'] for attr, _ in preprocessed_]
        else:
            dists = [g.graph['id']['dist'] for g in preprocessed_]
        vals = np.array([common.dist_to_val(d, self.max_dist) for d in dists])
        if self.mode == 'sequence':
            self.vectorizer = SeqVectorizer(auto_weights=True,
                                            **self.vectorizer_args)
        else:
            self.vectorizer = GraphVectorizer(auto_weights=True,
                                              **self.vectorizer_args)
        matrix = vectorize(preprocessed, vectorizer=self.vectorizer,
                           block_size=400, n_jobs=n_jobs)
        return matrix, vals 
Example 3
Project: ProtScan   Author: gianlucacorrado   File: model.py    (MIT License) View Source Project 6 votes vote down vote up
def get_predict_data(self, preprocessed, n_jobs=-1):
        """Compute the feature matrix and extract the subseq info."""
        def _subdict(dic):
            subdict = dict((k, dic[k]) for k in [
                           'tr_name', 'center', 'tr_len'] if k in dic)
            return subdict

        preprocessed, preprocessed_ = tee(preprocessed)
        if self.mode == 'sequence':
            info = [_subdict(attr) for attr, _ in preprocessed_]
        else:
            info = [_subdict(g.graph['id']) for g in preprocessed_]

        if self.mode == 'sequence':
            self.vectorizer = SeqVectorizer(auto_weights=True,
                                            **self.vectorizer_args)
        else:
            self.vectorizer = GraphVectorizer(auto_weights=True,
                                              **self.vectorizer_args)
        matrix = vectorize(preprocessed, vectorizer=self.vectorizer,
                           block_size=400, n_jobs=n_jobs)
        return matrix, info 
Example 4
Project: ProtScan   Author: gianlucacorrado   File: graph.py    (MIT License) View Source Project 6 votes vote down vote up
def graph_preprocessor(graphs, which_set, bin_sites=None, max_dist=None,
                       random_state=1234, **params):
    """Preprocess graphs."""
    assert which_set == 'train' or which_set == 'test', \
        "which_set must be either 'train' or 'test'."

    if which_set == 'train':
        graphs = add_distance(graphs, bin_sites)
        graphs = split_iterator(graphs, **params)
        graphs = add_type(graphs, max_dist)
        return graphs
    elif which_set == 'test':
        graphs, graphs_ = tee(graphs)
        full_graphs = transform_dictionary(graphs_)
        graphs = split_iterator(graphs, **params)
        return full_graphs, graphs
    else:
        raise Exception("ERROR: unrecognized which_set type: %s" %
                        which_set) 
Example 5
Project: code   Author: ActiveState   File: recipe-576961.py    (MIT License) View Source Project 6 votes vote down vote up
def hamming_numbers():
    # Generate "5-smooth" numbers, also called "Hamming numbers"
    # or "Regular numbers".  See: http://en.wikipedia.org/wiki/Regular_number
    # Finds solutions to 2**i * 3**j * 5**k  for some integers i, j, and k.

    def deferred_output():
        'Works like a forward reference to the "output" global variable'
        for i in output:
            yield i

    result, p2, p3, p5 = tee(deferred_output(), 4)  # split the output streams
    m2 = (2*x for x in p2)                          # multiples of 2
    m3 = (3*x for x in p3)                          # multiples of 3
    m5 = (5*x for x in p5)                          # multiples of 5
    merged = merge(m2, m3, m5)
    combined = chain([1], merged)                   # prepend starting point
    output = (k for k, v in groupby(combined))      # eliminate duplicates

    return result 
Example 6
Project: PYKE   Author: muddyfish   File: deep_map.py    (MIT License) View Source Project 6 votes vote down vote up
def apply_inf_list(self, a:Node.infinite, b:Node.infinite):
        def apply_iterator(a, b):
            a, a_copy = tee(a, 2)
            b, b_copy = tee(b, 2)
            yield self.run(next(a_copy), [next(b_copy)])
            size = 1
            while 1:
                next_a = next(a_copy)
                next_b = next(b_copy)
                a, new_a = tee(a, 2)
                b, new_b = tee(b, 2)
                yield from (self.run(next(new_a), [next_b]) for i in range(size))
                yield from (self.run(next_a, [next(new_b)]) for i in range(size))
                yield self.run(next_a, [next_b])
                size += 1
        return DummyList(apply_iterator(a, b)) 
Example 7
Project: nstock   Author: ybenitezf   File: filters.py    (license) View Source Project 6 votes vote down vote up
def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1 
Example 8
Project: eea.corpus   Author: eea   File: phrases.py    (license) View Source Project 6 votes vote down vote up
def build_phrase_models(content, base_path, settings):
    """ Build and save the phrase models
    """

    ngram_level = int(settings['level'])

    # According to tee() docs, this may be inefficient in terms of memory.
    # We need to do this because we need multiple passes through the
    # content stream.
    content = chain.from_iterable(doc.tokenized_text for doc in content)
    cs1, cs2 = tee(content, 2)

    for i in range(ngram_level-1):
        phrases = Phrases(cs1)
        path = "%s.%s" % (base_path, i + 2)     # save path as n-gram level
        logger.info("Phrase processor: Saving %s", path)
        phrases.save(path)
        # TODO: gensim complains about not using Phraser(phrases)
        content = phrases[cs2]  # tokenize phrases in content stream
        cs1, cs2 = tee(content, 2) 
Example 9
Project: python-tutorial   Author: Akuli   File: common.py    (license) View Source Project 6 votes vote down vote up
def find_links(file):
    """Find all markdown links in a file object.

    Yield (lineno, regexmatch) tuples.
    """
    # don't yield same link twice
    seen = set()

    # we need to loop over the file two lines at a time to support
    # multi-line (actually two-line) links, so this is kind of a mess
    firsts, seconds = itertools.tee(file)
    next(seconds)  # first line is never second line

    # we want 1-based indexing instead of 0-based and one-line links get
    # caught from linepair[1], so we need to start at two
    for lineno, linepair in enumerate(zip(firsts, seconds), start=2):
        lines = linepair[0] + linepair[1]
        for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL):
            if match.group(0) not in seen:
                seen.add(match.group(0))
                yield match, lineno 
Example 10
Project: rl_trading   Author: ucaiado   File: matching_engine.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, l_hours, i_milis=2, i_sec=None):
        '''
        Initiate a NextStopTime object. Save all parameters as attributes

        :param l_hours: list. Hours to be used in stoptime calculation
        :param i_milis*: integer. Number of miliseconds between each stoptime
        :param i_sec*: integer. Number of seconds between each stoptime. If
            defined, the i_milis is not used
        '''
        i_noise = None
        if i_milis > 4:
            i_noise = min(1, i_milis/5)
        self.gen_stoptime = get_next_stoptime(l_hours, i_milis, i_sec, i_noise)
        self.gen_stoptime, self.gen_backup = itertools.tee(self.gen_stoptime)
        self.s_last_stoptime = ''
        self.s_stoptime_was_set = ''
        self.s_time = "{:0>2}:{:0>2}:{:0>2}.{:0>3}"
        self.b_use_last = False 
Example 11
Project: sbds   Author: steemit   File: query_helpers.py    (license) View Source Project 6 votes vote down vote up
def trailing_windows(window_size=24, window_units='hours', window_count=3):
    """

    Args:
        window_size (int):
        window_units (str):
        window_count (int):

    Yields:
        Dict[str,str]

    """
    tos, froms = tee(trailing_periods(window_size, window_units, window_count))
    next(froms, None)
    for to, _from in zip(tos, froms):
        yield {'_from': _from, 'to': to} 
Example 12
Project: clopure   Author: vbkaisetsu   File: core.py    (license) View Source Project 6 votes vote down vote up
def iter_split_evaluate_wrapper(self, fn, local_vars, in_size, q_in, q_out):
        l = Lock()
        idx_q = Queue()
        def split_iter():
            try:
                while True:
                    l.acquire()
                    i, data_in = q_in.get()
                    idx_q.put(i)
                    if data_in is EOFMessage:
                        return
                    yield data_in
            except BaseException:
                traceback.print_exc(file=sys.stdout)
        gs = itertools.tee(split_iter(), in_size)
        for data_out in self.evaluate((fn,) + tuple((lambda i: (x[i] for x in gs[i]))(i) for i in range(in_size)), local_vars=local_vars):
            q_out.put((idx_q.get(), data_out))
            l.release()
        q_out.put((0, EOFMessage)) 
Example 13
Project: concepts   Author: sminez   File: prelude.py    (license) View Source Project 6 votes vote down vote up
def iwindowed(iterable, n):
    '''
    Take successive n-tuples from an iterable using a sliding window
    '''
    # Take n copies of the iterable
    iterables = tee(iterable, n)

    # Advance each to the correct starting position
    for step, it in enumerate(iterables):
        for s in range(step):
            next(it)

    # Zip the modified iterables and yield the elements as a genreator
    # NOTE: not using zip longest as we want to stop when we reach the end
    for t in zip(*iterables):
        yield t 
Example 14
Project: trainer   Author: nutszebra   File: nutszebra_sampling.py    (license) View Source Project 6 votes vote down vote up
def dummy_type_tee():
        """Give itertools.tee(yielder)[0]

        Edited date:
            160704

        Test:
            160704

        Returns:
            itertools.tee: this is used self.type_generator_or_tee
        """
        def dummy():
            yield None
        copy1, copy2 = itertools.tee(dummy())
        return copy2 
Example 15
Project: zippy   Author: securesystemslab   File: filters.py    (license) View Source Project 6 votes vote down vote up
def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1 
Example 16
Project: WhooshSearch   Author: rokartnaz   File: filters.py    (license) View Source Project 6 votes vote down vote up
def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1 
Example 17
Project: pypuf   Author: nils-wisiol   File: tools.py    (license) View Source Project 6 votes vote down vote up
def approx_stabilities(instance, num, reps, random_instance=RandomState()):
    """
    This function approximates the stability of the given `instance` for
    `num` challenges evaluating it `reps` times per challenge. The stability
    is the probability that the instance gives the correct response when
    evaluated.
    :param instance: pypuf.simulation.base.Simulation
                     The instance for the stability approximation
    :param num: int
                Amount of challenges to be evaluated
    :param reps: int
                 Amount of repetitions per challenge
    :return: array of float
             Array of the stabilities for each challenge
    """

    challenges = sample_inputs(instance.n, num, random_instance)
    responses = zeros((reps, num))
    for i in range(reps):
        challenges, unpacked_challenges = itertools.tee(challenges)
        responses[i, :] = instance.eval(array(list(unpacked_challenges)))
    return 0.5 + 0.5 * np_abs(np_sum(responses, axis=0)) / reps 
Example 18
Project: tichu-tournament   Author: aragos   File: utils.py    (license) View Source Project 6 votes vote down vote up
def prev_this_next(items):
    """
    Loop over a collection with look-ahead and look-back.
    
    From Thomas Guest, 
    http://wordaligned.org/articles/zippy-triples-served-with-python
    
    Seriously useful looping tool (Google "zippy triples")
    lets you loop a collection and see the previous and next items,
    which get set to None at the ends.
    
    To be used in layout algorithms where one wants a peek at the
    next item coming down the pipe.

    """
    
    extend = itertools.chain([None], items, [None])
    prev, this, next = itertools.tee(extend, 3)
    try:
        next(this)
        next(next)
        next(next)
    except StopIteration:
        pass
    return zip(prev, this, next) 
Example 19
Project: tabkit   Author: yandex-tabkit   File: pyparser.py    (license) View Source Project 6 votes vote down vote up
def parse_file_keeplines(lines, require_order=None):
    r"""
    >>> def gen_lines(x):
    ...     yield "# field:int\n"
    ...     for i in range(x):
    ...         yield "%s\n" % (test_field,)
    >>> parsed = parse_file_keeplines(gen_lines(2))
    >>> next(parsed)
    '# field:int\n'
    >>> test_field = 1; next(parsed)
    ('1\n', Rec(field=1))
    >>> test_field = 2; next(parsed)
    ('2\n', Rec(field=2))
    """
    lines_iter, lines_iter_parse = tee(iter(lines), 2)
    try:
        yield next(lines_iter)
    except StopIteration:
        raise Exception("No header")
    for line, rec in izip(lines_iter, parse_file(lines_iter_parse)):
        yield line, rec 
Example 20
Project: ww   Author: Tygs   File: iterables.py    (license) View Source Project 6 votes vote down vote up
def __iter__(self):
        """ Return the inner iterator

            Example:

                >>> from ww import g
                >>> gen = g(range(10))
                >>> iter(gen) == gen.iterator
                True

            Returns:
                Inner iterator.

            Raises:
                RuntimeError: if trying call __iter__ after calling .tee()
        """
        if self._tee_called:
            raise RuntimeError("You can't iterate on a g object after g.tee "
                               "has been called on it.")
        return self.iterator

    # TODO: type self, and stuff that returns things depending on self 
Example 21
Project: ww   Author: Tygs   File: iterables.py    (license) View Source Project 6 votes vote down vote up
def __mul__(self, num):
        # type: (int) -> IterableWrapper
        """ Duplicate itself and concatenate the results.

            It's basically a shortcut for `g().chain(*g().tee())`.

            Args:
                num: The number of times to duplicate.

            Example:

                >>> from ww import g
                >>> (g(range(3)) * 3).list()
                [0, 1, 2, 0, 1, 2, 0, 1, 2]
                >>> (2 * g(range(3))).list()
                [0, 1, 2, 0, 1, 2]
        """
        clones = itertools.tee(self.iterator, num)
        return self.__class__(itertools.chain(*clones)) 
Example 22
Project: ww   Author: Tygs   File: iterables.py    (license) View Source Project 6 votes vote down vote up
def tee(self, num=2):
        # type: (int) -> IterableWrapper
        """ Return copies of this generator.

            Proxy to itertools.tee().

           If you want to concatenate the results afterwards, use
           g() * x instead of g().tee(x) which does that for you.

            Args:
                num: The number of returned generators.

            Example:

                >>> from ww import g
                >>> a, b, c = g(range(3)).tee(3)
                >>> [tuple(a), tuple(b), tuple(c)]
                [(0, 1, 2), (0, 1, 2), (0, 1, 2)]
        """
        cls = self.__class__
        gen = cls(cls(x) for x in itertools.tee(self.iterator, num))
        self._tee_called = True
        return gen

    # TODO: allow negative end boundary 
Example 23
Project: ww   Author: Tygs   File: iterables.py    (license) View Source Project 6 votes vote down vote up
def copy(self):
        # type: () -> IterableWrapper
        """ Return an exact copy of the iterable.

            The reference of the new iterable will be the same as the source
            when `copy()` was called.

            Example:

                >>> from ww import g
                >>> my_g_1 = g(range(3))
                >>> my_g_2 = my_g_1.copy()
                >>> next(my_g_1)
                0
                >>> next(my_g_1)
                1
                >>> next(my_g_2)
                0
        """

        self.iterator, new = itertools.tee(self.iterator)
        return self.__class__(new) 
Example 24
Project: gougo   Author: amaozhao   File: utils.py    (license) View Source Project 6 votes vote down vote up
def previous_current_next(items):
    """
    From http://www.wordaligned.org/articles/zippy-triples-served-with-python

    Creates an iterator which returns (previous, current, next) triples,
    with ``None`` filling in when there is no previous or next
    available.
    """
    extend = itertools.chain([None], items, [None])
    prev, cur, nex = itertools.tee(extend, 3)
    # Advancing an iterator twice when we know there are two items (the
    # two Nones at the start and at the end) will never fail except if
    # `items` is some funny StopIteration-raising generator. There's no point
    # in swallowing this exception.
    next(cur)
    next(nex)
    next(nex)
    return zip(prev, cur, nex) 
Example 25
Project: CVProject   Author: hieuxinhe94   File: tokenize.py    (license) View Source Project 6 votes vote down vote up
def __call__(self, seq):
    min_order = self.min_order
    max_order = self.max_order
    t = tee(seq, max_order)
    for i in xrange(max_order):
      for j in xrange(i):
        # advance iterators, ignoring result
        t[i].next()
    while True:
      token = ''.join(tn.next() for tn in t)
      if len(token) < max_order: break
      for n in xrange(min_order-1, max_order):
        yield token[:n+1]
    for a in xrange(max_order-1):
      for b in xrange(min_order, max_order-a):
        yield token[a:a+b] 
Example 26
Project: CVProject   Author: hieuxinhe94   File: tokenize.py    (license) View Source Project 6 votes vote down vote up
def __call__(self, seq):
    _seq = str.split(seq)
    min_order = self.min_order
    max_order = self.max_order
    t = tee(_seq, max_order)
    for i in xrange(max_order):
      for j in xrange(i):
        # advance iterators, ignoring result
        t[i].next()
    while True:
      token = [tn.next() for tn in t]
      if len(token) < max_order: break
      for n in xrange(min_order-1, max_order):
        yield ' '.join(token[:n+1])
    for a in xrange(max_order-1):
      for b in xrange(min_order, max_order-a):
        yield ' '.join(token[a:a+b]) 
Example 27
Project: QualquerMerdaAPI   Author: tiagovizoto   File: filters.py    (license) View Source Project 6 votes vote down vote up
def __call__(self, tokens):
        from itertools import tee

        count = len(self.filters)
        # Tee the token iterator and wrap each teed iterator with the
        # corresponding filter
        gens = [filter(t.copy() for t in gen) for filter, gen
                in zip(self.filters, tee(tokens, count))]
        # Keep a count of the number of running iterators
        running = count
        while running:
            for i, gen in enumerate(gens):
                if gen is not None:
                    try:
                        yield next(gen)
                    except StopIteration:
                        gens[i] = None
                        running -= 1 
Example 28
Project: npstreams   Author: LaurentRDC   File: iter_utils.py    (license) View Source Project 5 votes vote down vote up
def itercopy(iterable, copies = 2):
    """
    Split iterable into 'copies'. Once this is done, the original iterable *should
    not* be used again.

    Parameters
    ----------
    iterable : iterable
        Iterable to be split. Once it is split, the original iterable
        should not be used again.
    copies : int, optional
        Number of copies. Also determines the number of returned iterables.
    
    Returns
    -------
    iter1, iter2, ... : iterable
        Copies of ``iterable``.
    
    Examples
    --------
    By rebinding the name of the original iterable, we make sure that it
    will never be used again.

    >>> from npstreams import itercopy
    >>> evens = (2*n for n in range(1000))
    >>> evens, evens_copy = itercopy(evens, copies = 2)

    See Also
    --------
    itertools.tee : equivalent function
    """
    # itercopy is included because documentation of itertools.tee isn't obvious
    # to everyone
    return tee(iterable, copies) 
Example 29
Project: kinect-2-libras   Author: inessadl   File: heapq.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def nsmallest(n, iterable, key=None):
    """Find the n smallest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key)[:n]
    """
    # Short-cut for n==1 is to use min() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [min(chain(head, it))]
        return [min(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = izip(iterable, count())                        # decorate
        result = _nsmallest(n, it)
        return map(itemgetter(0), result)                   # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(), in2)                 # decorate
    result = _nsmallest(n, it)
    return map(itemgetter(2), result)                       # undecorate 
Example 30
Project: kinect-2-libras   Author: inessadl   File: heapq.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def nlargest(n, iterable, key=None):
    """Find the n largest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
    """

    # Short-cut for n==1 is to use max() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [max(chain(head, it))]
        return [max(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key, reverse=True)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = izip(iterable, count(0,-1))                    # decorate
        result = _nlargest(n, it)
        return map(itemgetter(0), result)                   # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(0,-1), in2)             # decorate
    result = _nlargest(n, it)
    return map(itemgetter(2), result)                       # undecorate 
Example 31
Project: ProtScan   Author: gianlucacorrado   File: __init__.py    (MIT License) View Source Project 5 votes vote down vote up
def random_partition_iter(iterable, n_splits, random_state=1234):
    """Partition a generator in a random way (should mantain the unbalance)."""
    iterable, iterable_ = tee(iterable)
    size = iterator_size(iterable_)
    part_ids = random_partition(size, n_splits=n_splits,
                                random_state=random_state)
    parts = list()
    for p in part_ids:
        iterable, iterable_ = tee(iterable)
        parts.append(selection_iterator(iterable_, p))
    return parts 
Example 32
Project: ProtScan   Author: gianlucacorrado   File: __init__.py    (MIT License) View Source Project 5 votes vote down vote up
def balanced_split(sequences, bin_sites, n_splits,
                   random_state=1234):
    """Balanced split over binding/non-binding sequences."""
    # find the transcript names of positive and negatives
    sequences, sequences_ = tee(sequences)
    pos_ids = list()
    neg_ids = list()
    for i, (attr, _) in enumerate(sequences_):
        tr_name = attr['tr_name']
        is_binding = bin_sites.get(tr_name, False)
        if is_binding:
            pos_ids.append(i)
        else:
            neg_ids.append(i)

    random.seed(random_state)
    random.shuffle(pos_ids)
    random.shuffle(neg_ids)

    pos_split_points = \
        [int(len(pos_ids) * (float(i) / n_splits)) for i in range(1, n_splits)]
    neg_split_points = \
        [int(len(neg_ids) * (float(i) / n_splits)) for i in range(1, n_splits)]

    parts = list()
    for pos, neg in izip(np.split(pos_ids, pos_split_points),
                         np.split(neg_ids, neg_split_points)):
        sequences, sequences_ = tee(sequences)
        parts.append(selection_iterator(
            sequences_, np.concatenate([pos, neg])))
    return parts 
Example 33
Project: ProtScan   Author: gianlucacorrado   File: __init__.py    (MIT License) View Source Project 5 votes vote down vote up
def balanced_fraction(sequences, bin_sites, opt_fraction=1.0,
                      random_state=1234):
    """Balanced sample of sequences (over binding/non-binding)."""
    # find the transcript names of positive and negatives
    sequences, sequences_ = tee(sequences)
    pos_names = list()
    neg_names = list()
    for attr, _ in sequences_:
        tr_name = attr['tr_name']
        is_binding = bin_sites.get(tr_name, False)
        if is_binding:
            pos_names.append(tr_name)
        else:
            neg_names.append(tr_name)
    # sample from positives and negatives
    selected = list()
    random.seed(random_state)
    k_pos = max(1, int(opt_fraction * len(pos_names)))
    selected.extend(random.sample(pos_names, k_pos))
    k_neg = max(1, int(opt_fraction * len(neg_names)))
    selected.extend(random.sample(neg_names, k_neg))
    # yield only sequences in selected
    for attr, s in sequences:
        tr_name = attr['tr_name']
        if tr_name in selected:
            yield attr, s 
Example 34
Project: ProtScan   Author: gianlucacorrado   File: model.py    (MIT License) View Source Project 5 votes vote down vote up
def cross_vote(self, sequences, bin_sites, fit_batch_size=500,
                   pre_batch_size=200, max_splits=100000,
                   active_learning=False, random_state=1234, n_jobs=-1):
        """2-fold cross fit and vote."""
        votes = dict()
        part1, part2 = balanced_split(sequences, bin_sites, n_splits=2,
                                      random_state=random_state)

        part1, part1_ = tee(part1)
        part2, part2_ = tee(part2)

        # fold 1
        logger.debug("Fold 1")
        tr, te = part1, part2
        self._fit(tr, bin_sites, fit_batch_size, max_splits, active_learning,
                  random_state, n_jobs)
        part_votes = self.vote(
            te, pre_batch_size, max_splits, random_state, n_jobs)
        votes.update(part_votes)

        # fold 2
        logger.debug("Fold 2")
        tr, te = part2_, part1_
        self._fit(tr, bin_sites, fit_batch_size, max_splits, active_learning,
                  random_state, n_jobs)
        part_votes = self.vote(
            te, pre_batch_size, max_splits, random_state, n_jobs)
        votes.update(part_votes)
        return votes 
Example 35
Project: Flask_Blog   Author: sugarguo   File: formparser.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def parse(self, file, boundary, content_length):
        formstream, filestream = tee(
            self.parse_parts(file, boundary, content_length), 2)
        form = (p[1] for p in formstream if p[0] == 'form')
        files = (p[1] for p in filestream if p[0] == 'file')
        return self.cls(form), self.cls(files) 
Example 36
Project: code   Author: ActiveState   File: recipe-577515.py    (MIT License) View Source Project 5 votes vote down vote up
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    # from the itertools module documentation recipe
    
    a, b = tee(iterable)
    next(b, None)
    return izip(a, b) 
Example 37
Project: code   Author: ActiveState   File: recipe-577196.py    (MIT License) View Source Project 5 votes vote down vote up
def __init__(self, iterable):
        self._a, self._b = tee(iter(iterable), 2)
        self._previous = None
        self._peeked   = self._b.next() 
Example 38
Project: ravel   Author: ravel-net   File: orch.py    (Apache License 2.0) View Source Project 5 votes vote down vote up
def pairwise(iterable):
    a, b = tee(iterable)
    next(b, None)
    return izip(a, b) 
Example 39
Project: tableschema-elasticsearch-py   Author: frictionlessdata   File: storage.py    (MIT License) View Source Project 5 votes vote down vote up
def write(self, bucket, doc_type, rows, primary_key, update=False, as_generator=False):

        if primary_key is None or len(primary_key) == 0:
            raise ValueError('primary_key cannot be an empty list')

        def actions(rows_, doc_type_, primary_key_, update_):
            if update_:
                for row_ in rows_:
                    yield {
                        '_op_type': 'update',
                        '_index': bucket,
                        '_type': doc_type_,
                        '_id': self.generate_doc_id(row_, primary_key_),
                        '_source': {
                            'doc': row_,
                            'doc_as_upsert': True
                        }
                    }
            else:
                for row_ in rows_:
                    yield {
                        '_op_type': 'index',
                        '_index': bucket,
                        '_type': doc_type_,
                        '_id': self.generate_doc_id(row_, primary_key_),
                        '_source': row_
                    }

        iterables = itertools.tee(rows)
        actions_iterable = actions(iterables[0], doc_type, primary_key, update)

        iter = zip(streaming_bulk(self.__es, actions=actions_iterable), iterables[1])

        if as_generator:
            for result, row in iter:
                yield row
        else:
            collections.deque(iter, maxlen=0)

        self.__es.indices.flush(bucket) 
Example 40
Project: swjtu-pyscraper   Author: Desgard   File: formparser.py    (MIT License) View Source Project 5 votes vote down vote up
def parse(self, file, boundary, content_length):
        formstream, filestream = tee(
            self.parse_parts(file, boundary, content_length), 2)
        form = (p[1] for p in formstream if p[0] == 'form')
        files = (p[1] for p in filestream if p[0] == 'file')
        return self.cls(form), self.cls(files) 
Example 41
Project: TrackToTrip   Author: ruipgil   File: utils.py    (MIT License) View Source Project 5 votes vote down vote up
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    now, nxt = tee(iterable)
    next(nxt, None)
    return izip(now, nxt) 
Example 42
Project: datapipelines-python   Author: meraki-analytics   File: pipelines.py    (MIT License) View Source Project 5 votes vote down vote up
def _pairwise(iterable: Iterable[T]) -> Iterable[Tuple[T, T]]:
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b) 
Example 43
Project: django-souvenirs   Author: appsembler   File: reports.py    (MIT License) View Source Project 5 votes vote down vote up
def _usage_for_periods(periods):
    """
    Generate a sequence of dictionaries of usage data corresponding to periods,
    each of which should be a tuple of (start, end) datetimes, where start is
    inclusive and end is exclusive.

    Each dictionary in the generated sequence has this form:

        {
            period: {
                start: datetime,
                end: datetime,
            }
            usage: {
                registered_users: int,
                activated_users: int,
                active_users: int,
            }
        }

    """
    rp, ap, periods = itertools.tee(periods, 3)
    ir = (registered_users_as_of(end) for start, end in rp)
    ia = (count_active_users(*p) for p in ap)
    for p, r, active in izip(periods, ir, ia):
        start, end = p
        registered, activated = r
        yield dict(
            period=dict(
                start=start,
                end=end,
            ),
            usage=dict(
                registered_users=registered,
                activated_users=activated,
                active_users=active,
            ),
        ) 
Example 44
Project: zanph   Author: zanph   File: formparser.py    (license) View Source Project 5 votes vote down vote up
def parse(self, file, boundary, content_length):
        formstream, filestream = tee(
            self.parse_parts(file, boundary, content_length), 2)
        form = (p[1] for p in formstream if p[0] == 'form')
        files = (p[1] for p in filestream if p[0] == 'file')
        return self.cls(form), self.cls(files) 
Example 45
Project: iosxr-ansible   Author: ios-xr   File: netcfg.py    (license) View Source Project 5 votes vote down vote up
def get_next(iterable):
    item, next_item = itertools.tee(iterable, 2)
    next_item = itertools.islice(next_item, 1, None)
    return zip_longest(item, next_item) 
Example 46
Project: hostapd-mana   Author: adde88   File: heapq.py    (license) View Source Project 5 votes vote down vote up
def nsmallest(n, iterable, key=None):
    """Find the n smallest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key)[:n]
    """
    # Short-cut for n==1 is to use min() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [min(chain(head, it))]
        return [min(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = izip(iterable, count())                        # decorate
        result = _nsmallest(n, it)
        return map(itemgetter(0), result)                   # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(), in2)                 # decorate
    result = _nsmallest(n, it)
    return map(itemgetter(2), result)                       # undecorate 
Example 47
Project: hostapd-mana   Author: adde88   File: heapq.py    (license) View Source Project 5 votes vote down vote up
def nlargest(n, iterable, key=None):
    """Find the n largest elements in a dataset.

    Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
    """

    # Short-cut for n==1 is to use max() when len(iterable)>0
    if n == 1:
        it = iter(iterable)
        head = list(islice(it, 1))
        if not head:
            return []
        if key is None:
            return [max(chain(head, it))]
        return [max(chain(head, it), key=key)]

    # When n>=size, it's faster to use sorted()
    try:
        size = len(iterable)
    except (TypeError, AttributeError):
        pass
    else:
        if n >= size:
            return sorted(iterable, key=key, reverse=True)[:n]

    # When key is none, use simpler decoration
    if key is None:
        it = izip(iterable, count(0,-1))                    # decorate
        result = _nlargest(n, it)
        return map(itemgetter(0), result)                   # undecorate

    # General case, slowest method
    in1, in2 = tee(iterable)
    it = izip(imap(key, in1), count(0,-1), in2)             # decorate
    result = _nlargest(n, it)
    return map(itemgetter(2), result)                       # undecorate 
Example 48
Project: wltrace   Author: jhshi   File: utils.py    (license) View Source Project 5 votes vote down vote up
def pairwise(it):
    a, b = itertools.tee(it)
    next(b, None)
    return itertools.izip(a, b) 
Example 49
Project: pyconjp-website   Author: pyconjp   File: timetable.py    (license) View Source Project 5 votes vote down vote up
def pairwise(iterable):
    a, b = itertools.tee(iterable)
    b.next()
    return itertools.izip_longest(a, b) 
Example 50
Project: phredutils   Author: doctaphred   File: itercools.py    (license) View Source Project 5 votes vote down vote up
def filters(iterable, *predicates):
    """Filter the iterable on each given predicate.

    >>> div_by_two = lambda x: not x % 2
    >>> div_by_three = lambda x: not x % 3
    >>> twos, threes = filters(range(10), div_by_two, div_by_three)
    >>> list(twos)
    [0, 2, 4, 6, 8]
    >>> list(threes)
    [0, 3, 6, 9]
    """
    tees = tee(iterable, len(predicates))
    return tuple(filter(pred, t) for pred, t in zip(predicates, tees))