Python itertools.islice() Examples

The following are 30 code examples of itertools.islice(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module itertools , or try the search function .
Example #1
Source File: nativetypes.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def native_concat(nodes):
    """Return a native Python type from the list of compiled nodes. If the
    result is a single node, its value is returned. Otherwise, the nodes are
    concatenated as strings. If the result can be parsed with
    :func:`ast.literal_eval`, the parsed value is returned. Otherwise, the
    string is returned.
    """
    head = list(islice(nodes, 2))

    if not head:
        return None

    if len(head) == 1:
        out = head[0]
    else:
        out = u''.join([text_type(v) for v in chain(head, nodes)])

    try:
        return literal_eval(out)
    except (ValueError, SyntaxError, MemoryError):
        return out 
Example #2
Source File: word2vecReaderUtils.py    From word2vec-twitter with MIT License 6 votes vote down vote up
def chunkize_serial(iterable, chunksize, as_numpy=False):
    """
    Return elements from the iterable in `chunksize`-ed lists. The last returned
    element may be smaller (if length of collection is not divisible by `chunksize`).

    >>> print(list(grouper(range(10), 3)))
    [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

    """
    import numpy
    it = iter(iterable)
    while True:
        if as_numpy:
            # convert each document to a 2d numpy array (~6x faster when transmitting
            # chunk data over the wire, in Pyro)
            wrapped_chunk = [[numpy.array(doc) for doc in itertools.islice(it, int(chunksize))]]
        else:
            wrapped_chunk = [list(itertools.islice(it, int(chunksize)))]
        if not wrapped_chunk[0]:
            break
        # memory opt: wrap the chunk and then pop(), to avoid leaving behind a dangling reference
        yield wrapped_chunk.pop() 
Example #3
Source File: pytorch_ext.py    From L3C-PyTorch with GNU General Public License v3.0 6 votes vote down vote up
def assert_equal(t1, t2, show_num_wrong=3, names=None, msg=''):
    if t1.shape != t2.shape:
        raise AssertionError('Different shapes! {} != {}'.format(t1.shape, t2.shape))
    wrong = t1 != t2
    if not wrong.any():
        return
    if names is None:
        names = ('t1', 't2')
    wrong_idxs = wrong.nonzero()
    num_wrong = len(wrong_idxs)
    show_num_wrong = min(show_num_wrong, num_wrong)
    wrong_idxs = itertools.islice((tuple(i.tolist()) for i in wrong_idxs),
                                  show_num_wrong)
    err_msg = ' // '.join('{}: {}!={}'.format(idx, t1[idx], t2[idx])
                          for idx in wrong_idxs)
    raise AssertionError(('{} != {}: {}, and {}/{} other(s) '.format(
            names[0], names[1], err_msg, num_wrong - show_num_wrong, np.prod(t1.shape)) + msg).strip()) 
Example #4
Source File: conll2000.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def demo():
    from en.parser.nltk_lite.corpora import conll2000
    from itertools import islice

    print "CONLL Chunked data\n"
    
    print "Raw text:"
    for sent in islice(conll2000.raw(), 0, 5):
        print sent
    print

    print "Tagged text:"
    for sent in islice(conll2000.tagged(), 0, 5):
        print sent
    print

    print "Chunked text:"
    for tree in islice(conll2000.chunked(chunk_types=('NP', 'PP', 'VP')), 0, 5):
        print tree.pp()
    print 
Example #5
Source File: nativetypes.py    From misp42splunk with GNU Lesser General Public License v3.0 6 votes vote down vote up
def native_concat(nodes):
    """Return a native Python type from the list of compiled nodes. If the
    result is a single node, its value is returned. Otherwise, the nodes are
    concatenated as strings. If the result can be parsed with
    :func:`ast.literal_eval`, the parsed value is returned. Otherwise, the
    string is returned.
    """
    head = list(islice(nodes, 2))

    if not head:
        return None

    if len(head) == 1:
        out = head[0]
    else:
        out = u''.join([text_type(v) for v in chain(head, nodes)])

    try:
        return literal_eval(out)
    except (ValueError, SyntaxError, MemoryError):
        return out 
Example #6
Source File: heapq.py    From jawfish with MIT License 6 votes vote down vote up
def nsmallest(n, iterable):
    """Find the n smallest elements in a dataset.

    Equivalent to:  sorted(iterable)[:n]
    """
    if n < 0:
        return []
    it = iter(iterable)
    result = list(islice(it, n))
    if not result:
        return result
    _heapify_max(result)
    _heappushpop = _heappushpop_max
    for elem in it:
        _heappushpop(result, elem)
    result.sort()
    return result

# 'heap' is a heap at all indices >= startpos, except possibly for pos.  pos
# is the index of a leaf with a possibly out-of-order value.  Restore the
# heap invariant. 
Example #7
Source File: toolbox.py    From razzy-spinner with GNU General Public License v3.0 6 votes vote down vote up
def demo():
    from en.parser.nltk_lite.corpora import toolbox
    from itertools import islice
    from pprint import pprint

    print 'Raw:'
    pprint(list(islice(toolbox.raw(), 3)))

    print 'Dictionary:'
    pprint(list(islice(toolbox.dictionary(), 3)))

    print 'Dictionary-List:'
    pprint(list(islice(toolbox.dict_list(), 3)))

    print 'Complex test cases, no header'
    pprint(list(toolbox.raw("test.dic")))

    print 'Complex test cases, no header, dictionary'
    pprint(list(toolbox.dictionary("test.dic")))

    print 'Complex test cases, no header, dictionary list'
    pprint(list(toolbox.dict_list("test.dic")))

    print 'Complex test cases, with header'
    pprint(list(toolbox.raw("test.dic", include_header=True))) 
Example #8
Source File: heapq.py    From jawfish with MIT License 6 votes vote down vote up
def nlargest(n, iterable):
    """Find the n largest elements in a dataset.

    Equivalent to:  sorted(iterable, reverse=True)[:n]
    """
    if n < 0:
        return []
    it = iter(iterable)
    result = list(islice(it, n))
    if not result:
        return result
    heapify(result)
    _heappushpop = heappushpop
    for elem in it:
        _heappushpop(result, elem)
    result.sort(reverse=True)
    return result 
Example #9
Source File: instaloader_unittests.py    From instaloader with MIT License 5 votes vote down vote up
def test_followees_and_stories(self):
        profile = instaloader.Profile.from_username(self.L.context, OWN_USERNAME)
        followees = set(islice(profile.get_followees(), PAGING_MAX_COUNT))
        self.L.download_profiles(followees, profile_pic=False, stories=True, posts=False, raise_errors=True) 
Example #10
Source File: search_command.py    From misp42splunk with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _prepare_recording(self, argv, ifile, ofile):

        # Create the recordings directory, if it doesn't already exist

        recordings = os.path.join(environment.splunk_home, 'var', 'run', 'splunklib.searchcommands', 'recordings')

        if not os.path.isdir(recordings):
            os.makedirs(recordings)

        # Create input/output recorders from ifile and ofile

        recording = os.path.join(recordings, self.__class__.__name__ + '-' + repr(time()) + '.' + self._metadata.action)
        ifile = Recorder(recording + '.input', ifile)
        ofile = Recorder(recording + '.output', ofile)

        # Archive the dispatch directory--if it exists--so that it can be used as a baseline in mocks)

        dispatch_dir = self._metadata.searchinfo.dispatch_dir

        if dispatch_dir is not None:  # __GETINFO__ action does not include a dispatch_dir
            root_dir, base_dir = os.path.split(dispatch_dir)
            make_archive(recording + '.dispatch_dir', 'gztar', root_dir, base_dir, logger=self.logger)

        # Save a splunk command line because it is useful for developing tests

        with open(recording + '.splunk_cmd', 'wb') as f:
            f.write('splunk cmd python '.encode())
            f.write(os.path.basename(argv[0]).encode())
            for arg in islice(argv, 1, len(argv)):
                f.write(' '.encode())
                f.write(arg.encode())

        return ifile, ofile 
Example #11
Source File: word2vecReaderUtils.py    From word2vec-twitter with MIT License 5 votes vote down vote up
def run(self):
        if self.as_numpy:
            import numpy # don't clutter the global namespace with a dependency on numpy
        it = iter(self.corpus)
        while True:
            chunk = itertools.islice(it, self.chunksize)
            if self.as_numpy:
                # HACK XXX convert documents to numpy arrays, to save memory.
                # This also gives a scipy warning at runtime:
                # "UserWarning: indices array has non-integer dtype (float64)"
                wrapped_chunk = [[numpy.asarray(doc) for doc in chunk]]
            else:
                wrapped_chunk = [list(chunk)]

            if not wrapped_chunk[0]:
                self.q.put(None, block=True)
                break

            try:
                qsize = self.q.qsize()
            except NotImplementedError:
                qsize = '?'
            logger.debug("prepared another chunk of %i documents (qsize=%s)" %
                        (len(wrapped_chunk[0]), qsize))
            self.q.put(wrapped_chunk.pop(), block=True)
#endclass InputQueue 
Example #12
Source File: measurement_search.py    From ripe-atlas-tools with GNU General Public License v3.0 5 votes vote down vote up
def run(self):

        if not self.arguments.field:
            self.arguments.field = ("id", "type", "description", "status")

        filters = self._get_filters()
        measurements = MeasurementRequest(
            return_objects=True, user_agent=self.user_agent, **filters)
        truncated_measurements = itertools.islice(
            measurements, self.arguments.limit)

        if self.arguments.ids_only:
            for measurement in truncated_measurements:
                print(measurement.id)
            return

        hr = self._get_horizontal_rule()

        print(self._get_filter_display(filters))
        print(self._get_header())
        print(colourise(hr, "bold"))

        for measurement in truncated_measurements:
            print(colourise(self._get_line_format().format(
                *self._get_line_items(measurement)
            ), self._get_colour_from_status(measurement.status_id)))

        print(colourise(hr, "bold"))

        # Print total count of found measurements
        print(("{:>" + str(len(hr)) + "}\n").format(
            "Showing {} of {} total measurements".format(
                min(self.arguments.limit, measurements.total_count),
                measurements.total_count
            )
        )) 
Example #13
Source File: instaloader_unittests.py    From instaloader with MIT License 5 votes vote down vote up
def post_paging_test(self, iterator):
        previous_post = None
        for post in islice(iterator, PAGING_MAX_COUNT):
            print(post)
            if previous_post:
                self.assertTrue(post.date_utc < previous_post.date_utc)
            previous_post = post 
Example #14
Source File: instaloader_unittests.py    From instaloader with MIT License 5 votes vote down vote up
def test_public_profile_tagged_paging(self):
        for post in islice(instaloader.Profile.from_username(self.L.context, PUBLIC_PROFILE).get_tagged_posts(),
                           PAGING_MAX_COUNT):
            print(post) 
Example #15
Source File: instaloader_unittests.py    From instaloader with MIT License 5 votes vote down vote up
def test_public_profile_igtv(self):
        for post in islice(instaloader.Profile.from_username(self.L.context, PUBLIC_PROFILE_WITH_IGTV).get_igtv_posts(),
                           PAGING_MAX_COUNT):
            print(post) 
Example #16
Source File: word2vecReaderUtils.py    From word2vec-twitter with MIT License 5 votes vote down vote up
def __iter__(self):
        return itertools.islice(itertools.cycle(self.corpus), self.reps) 
Example #17
Source File: newrange.py    From misp42splunk with GNU Lesser General Public License v3.0 5 votes vote down vote up
def __init__(self, range_):
        self._stepper = islice(count(range_.start, range_.step), len(range_)) 
Example #18
Source File: search_command.py    From misp42splunk with GNU Lesser General Public License v3.0 5 votes vote down vote up
def _prepare_recording(self, argv, ifile, ofile):

        # Create the recordings directory, if it doesn't already exist

        recordings = os.path.join(environment.splunk_home, 'var', 'run', 'splunklib.searchcommands', 'recordings')

        if not os.path.isdir(recordings):
            os.makedirs(recordings)

        # Create input/output recorders from ifile and ofile

        recording = os.path.join(recordings, self.__class__.__name__ + '-' + repr(time()) + '.' + self._metadata.action)
        ifile = Recorder(recording + '.input', ifile)
        ofile = Recorder(recording + '.output', ofile)

        # Archive the dispatch directory--if it exists--so that it can be used as a baseline in mocks)

        dispatch_dir = self._metadata.searchinfo.dispatch_dir

        if dispatch_dir is not None:  # __GETINFO__ action does not include a dispatch_dir
            root_dir, base_dir = os.path.split(dispatch_dir)
            make_archive(recording + '.dispatch_dir', 'gztar', root_dir, base_dir, logger=self.logger)

        # Save a splunk command line because it is useful for developing tests

        with open(recording + '.splunk_cmd', 'wb') as f:
            f.write('splunk cmd python '.encode())
            f.write(os.path.basename(argv[0]).encode())
            for arg in islice(argv, 1, len(argv)):
                f.write(' '.encode())
                f.write(arg.encode())

        return ifile, ofile 
Example #19
Source File: utils.py    From Telethon with MIT License 5 votes vote down vote up
def chunks(iterable, size=100):
    """
    Turns the given iterable into chunks of the specified size,
    which is 100 by default since that's what Telegram uses the most.
    """
    it = iter(iterable)
    size -= 1
    for head in it:
        yield itertools.chain([head], itertools.islice(it, size)) 
Example #20
Source File: newrange.py    From verge3d-blender-addon with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, range_):
        self._stepper = islice(count(range_.start, range_.step), len(range_)) 
Example #21
Source File: importer.py    From pydfs-lineup-optimizer with MIT License 5 votes vote down vote up
def import_players(self):
        with open(self.filename, 'r') as csv_file:
            start_line = 0  # Find line with 'TeamAbbrev', that's where players data starts
            while True:
                line = csv_file.readline()
                if 'TeamAbbrev' in line:
                    csv_file.seek(0)
                    csv_data = csv.DictReader(islice(csv_file, start_line, None),
                                              skipinitialspace=True)
                    return [self._row_to_player(row) for row in csv_data]
                elif line == '':
                    raise LineupOptimizerIncorrectCSV
                else:
                    start_line += 1 
Example #22
Source File: importer.py    From pydfs-lineup-optimizer with MIT License 5 votes vote down vote up
def import_players(self) -> List[Player]:
        with open(self.filename, 'r') as csv_file:
            start_line = 0  # Find line with 'FPPG', that's where players data starts
            while True:
                line = csv_file.readline()
                if 'FPPG' in line:
                    csv_file.seek(0)
                    csv_data = csv.DictReader(islice(csv_file, start_line, None),
                                              skipinitialspace=True)
                    return [self._row_to_player(row) for row in csv_data]
                elif line == '':
                    raise LineupOptimizerIncorrectCSV
                else:
                    start_line += 1 
Example #23
Source File: test_core.py    From lineflow with MIT License 5 votes vote down vote up
def setUp(self):
        self.data = Dataset(range(100))
        window_size = 3
        expected = []
        it = iter(range(100))
        window = tuple(itertools.islice(it, window_size))
        while window:
            expected.append(window)
            window = tuple(itertools.islice(it, window_size))
        self.expected = expected
        self.window_size = window_size 
Example #24
Source File: core.py    From lineflow with MIT License 5 votes vote down vote up
def take(self, n: int) -> List[Any]:
        """Takes the first n examples from the dataset.

        Args:
            n (int): the number of examples to take.

        Returns (List[Any]):
            The list of the ``n`` examples.
        """
        return list(islice(self, n)) 
Example #25
Source File: wordnet.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def morphy(self, form, pos=None):
        """
        Find a possible base form for the given form, with the given
        part of speech, by checking WordNet's list of exceptional
        forms, and by recursively stripping affixes for this part of
        speech until a form in WordNet is found.

        >>> from nltk.corpus import wordnet as wn
        >>> print(wn.morphy('dogs'))
        dog
        >>> print(wn.morphy('churches'))
        church
        >>> print(wn.morphy('aardwolves'))
        aardwolf
        >>> print(wn.morphy('abaci'))
        abacus
        >>> wn.morphy('hardrock', wn.ADV)
        >>> print(wn.morphy('book', wn.NOUN))
        book
        >>> wn.morphy('book', wn.ADJ)
        """

        if pos is None:
            morphy = self._morphy
            analyses = chain(a for p in POS_LIST for a in morphy(form, p))
        else:
            analyses = self._morphy(form, pos)

        # get the first one we find
        first = list(islice(analyses, 1))
        if len(first) == 1:
            return first[0]
        else:
            return None 
Example #26
Source File: chomsky.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def generate_chomsky(times=5, line_length=72):
    parts = []
    for part in (leadins, subjects, verbs, objects):
        phraselist = list(map(str.strip, part.splitlines()))
        random.shuffle(phraselist)
        parts.append(phraselist)
    output = chain(*islice(izip(*parts), 0, times))
    print(textwrap.fill(" ".join(output), line_length)) 
Example #27
Source File: toolbox.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def demo():
    from itertools import islice

#    zip_path = find('corpora/toolbox.zip')
#    lexicon = ToolboxData(ZipFilePathPointer(zip_path, 'toolbox/rotokas.dic')).parse()
    file_path = find('corpora/toolbox/rotokas.dic')
    lexicon = ToolboxData(file_path).parse()
    print('first field in fourth record:')
    print(lexicon[3][0].tag)
    print(lexicon[3][0].text)

    print('\nfields in sequential order:')
    for field in islice(lexicon.find('record'), 10):
        print(field.tag, field.text)

    print('\nlx fields:')
    for field in islice(lexicon.findall('record/lx'), 10):
        print(field.text)

    settings = ToolboxSettings()
    file_path = find('corpora/toolbox/MDF/MDF_AltH.typ')
    settings.open(file_path)
#    settings.open(ZipFilePathPointer(zip_path, entry='toolbox/MDF/MDF_AltH.typ'))
    tree = settings.parse(unwrap=False, encoding='cp1252')
    print(tree.find('expset/expMDF/rtfPageSetup/paperSize').text)
    settings_tree = ElementTree(tree)
    print(to_settings_string(settings_tree).encode('utf8')) 
Example #28
Source File: util.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def iterate_from(self, start_index):
        if start_index < self._offsets[-1]:
            sublist_index = bisect.bisect_right(self._offsets, start_index)-1
        else:
            sublist_index = len(self._offsets)-1

        index = self._offsets[sublist_index]

        # Construct an iterator over the sublists.
        if isinstance(self._list, AbstractLazySequence):
            sublist_iter = self._list.iterate_from(sublist_index)
        else:
            sublist_iter = islice(self._list, sublist_index, None)

        for sublist in sublist_iter:
            if sublist_index == (len(self._offsets)-1):
                assert index+len(sublist) >= self._offsets[-1], (
                        'offests not monotonic increasing!')
                self._offsets.append(index+len(sublist))
            else:
                assert self._offsets[sublist_index+1] == index+len(sublist), (
                        'inconsistent list value (num elts)')

            for value in sublist[max(0, start_index-index):]:
                yield value

            index += len(sublist)
            sublist_index += 1 
Example #29
Source File: util.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def iterate_from(self, start):
        return islice(self._source.iterate_from(start+self._start),
                      max(0, len(self)-start)) 
Example #30
Source File: util.py    From razzy-spinner with GNU General Public License v3.0 5 votes vote down vote up
def __new__(cls, source, start, stop):
        """
        Construct a new slice from a given underlying sequence.  The
        ``start`` and ``stop`` indices should be absolute indices --
        i.e., they should not be negative (for indexing from the back
        of a list) or greater than the length of ``source``.
        """
        # If the slice is small enough, just use a tuple.
        if stop-start < cls.MIN_SIZE:
            return list(islice(source.iterate_from(start), stop-start))
        else:
            return object.__new__(cls)