Python itertools.islice() Examples
The following are 30
code examples of itertools.islice().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
itertools
, or try the search function
.
Example #1
Source File: nativetypes.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def native_concat(nodes): """Return a native Python type from the list of compiled nodes. If the result is a single node, its value is returned. Otherwise, the nodes are concatenated as strings. If the result can be parsed with :func:`ast.literal_eval`, the parsed value is returned. Otherwise, the string is returned. """ head = list(islice(nodes, 2)) if not head: return None if len(head) == 1: out = head[0] else: out = u''.join([text_type(v) for v in chain(head, nodes)]) try: return literal_eval(out) except (ValueError, SyntaxError, MemoryError): return out
Example #2
Source File: word2vecReaderUtils.py From word2vec-twitter with MIT License | 6 votes |
def chunkize_serial(iterable, chunksize, as_numpy=False): """ Return elements from the iterable in `chunksize`-ed lists. The last returned element may be smaller (if length of collection is not divisible by `chunksize`). >>> print(list(grouper(range(10), 3))) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] """ import numpy it = iter(iterable) while True: if as_numpy: # convert each document to a 2d numpy array (~6x faster when transmitting # chunk data over the wire, in Pyro) wrapped_chunk = [[numpy.array(doc) for doc in itertools.islice(it, int(chunksize))]] else: wrapped_chunk = [list(itertools.islice(it, int(chunksize)))] if not wrapped_chunk[0]: break # memory opt: wrap the chunk and then pop(), to avoid leaving behind a dangling reference yield wrapped_chunk.pop()
Example #3
Source File: pytorch_ext.py From L3C-PyTorch with GNU General Public License v3.0 | 6 votes |
def assert_equal(t1, t2, show_num_wrong=3, names=None, msg=''): if t1.shape != t2.shape: raise AssertionError('Different shapes! {} != {}'.format(t1.shape, t2.shape)) wrong = t1 != t2 if not wrong.any(): return if names is None: names = ('t1', 't2') wrong_idxs = wrong.nonzero() num_wrong = len(wrong_idxs) show_num_wrong = min(show_num_wrong, num_wrong) wrong_idxs = itertools.islice((tuple(i.tolist()) for i in wrong_idxs), show_num_wrong) err_msg = ' // '.join('{}: {}!={}'.format(idx, t1[idx], t2[idx]) for idx in wrong_idxs) raise AssertionError(('{} != {}: {}, and {}/{} other(s) '.format( names[0], names[1], err_msg, num_wrong - show_num_wrong, np.prod(t1.shape)) + msg).strip())
Example #4
Source File: conll2000.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def demo(): from en.parser.nltk_lite.corpora import conll2000 from itertools import islice print "CONLL Chunked data\n" print "Raw text:" for sent in islice(conll2000.raw(), 0, 5): print sent print print "Tagged text:" for sent in islice(conll2000.tagged(), 0, 5): print sent print print "Chunked text:" for tree in islice(conll2000.chunked(chunk_types=('NP', 'PP', 'VP')), 0, 5): print tree.pp() print
Example #5
Source File: nativetypes.py From misp42splunk with GNU Lesser General Public License v3.0 | 6 votes |
def native_concat(nodes): """Return a native Python type from the list of compiled nodes. If the result is a single node, its value is returned. Otherwise, the nodes are concatenated as strings. If the result can be parsed with :func:`ast.literal_eval`, the parsed value is returned. Otherwise, the string is returned. """ head = list(islice(nodes, 2)) if not head: return None if len(head) == 1: out = head[0] else: out = u''.join([text_type(v) for v in chain(head, nodes)]) try: return literal_eval(out) except (ValueError, SyntaxError, MemoryError): return out
Example #6
Source File: heapq.py From jawfish with MIT License | 6 votes |
def nsmallest(n, iterable): """Find the n smallest elements in a dataset. Equivalent to: sorted(iterable)[:n] """ if n < 0: return [] it = iter(iterable) result = list(islice(it, n)) if not result: return result _heapify_max(result) _heappushpop = _heappushpop_max for elem in it: _heappushpop(result, elem) result.sort() return result # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the # heap invariant.
Example #7
Source File: toolbox.py From razzy-spinner with GNU General Public License v3.0 | 6 votes |
def demo(): from en.parser.nltk_lite.corpora import toolbox from itertools import islice from pprint import pprint print 'Raw:' pprint(list(islice(toolbox.raw(), 3))) print 'Dictionary:' pprint(list(islice(toolbox.dictionary(), 3))) print 'Dictionary-List:' pprint(list(islice(toolbox.dict_list(), 3))) print 'Complex test cases, no header' pprint(list(toolbox.raw("test.dic"))) print 'Complex test cases, no header, dictionary' pprint(list(toolbox.dictionary("test.dic"))) print 'Complex test cases, no header, dictionary list' pprint(list(toolbox.dict_list("test.dic"))) print 'Complex test cases, with header' pprint(list(toolbox.raw("test.dic", include_header=True)))
Example #8
Source File: heapq.py From jawfish with MIT License | 6 votes |
def nlargest(n, iterable): """Find the n largest elements in a dataset. Equivalent to: sorted(iterable, reverse=True)[:n] """ if n < 0: return [] it = iter(iterable) result = list(islice(it, n)) if not result: return result heapify(result) _heappushpop = heappushpop for elem in it: _heappushpop(result, elem) result.sort(reverse=True) return result
Example #9
Source File: instaloader_unittests.py From instaloader with MIT License | 5 votes |
def test_followees_and_stories(self): profile = instaloader.Profile.from_username(self.L.context, OWN_USERNAME) followees = set(islice(profile.get_followees(), PAGING_MAX_COUNT)) self.L.download_profiles(followees, profile_pic=False, stories=True, posts=False, raise_errors=True)
Example #10
Source File: search_command.py From misp42splunk with GNU Lesser General Public License v3.0 | 5 votes |
def _prepare_recording(self, argv, ifile, ofile): # Create the recordings directory, if it doesn't already exist recordings = os.path.join(environment.splunk_home, 'var', 'run', 'splunklib.searchcommands', 'recordings') if not os.path.isdir(recordings): os.makedirs(recordings) # Create input/output recorders from ifile and ofile recording = os.path.join(recordings, self.__class__.__name__ + '-' + repr(time()) + '.' + self._metadata.action) ifile = Recorder(recording + '.input', ifile) ofile = Recorder(recording + '.output', ofile) # Archive the dispatch directory--if it exists--so that it can be used as a baseline in mocks) dispatch_dir = self._metadata.searchinfo.dispatch_dir if dispatch_dir is not None: # __GETINFO__ action does not include a dispatch_dir root_dir, base_dir = os.path.split(dispatch_dir) make_archive(recording + '.dispatch_dir', 'gztar', root_dir, base_dir, logger=self.logger) # Save a splunk command line because it is useful for developing tests with open(recording + '.splunk_cmd', 'wb') as f: f.write('splunk cmd python '.encode()) f.write(os.path.basename(argv[0]).encode()) for arg in islice(argv, 1, len(argv)): f.write(' '.encode()) f.write(arg.encode()) return ifile, ofile
Example #11
Source File: word2vecReaderUtils.py From word2vec-twitter with MIT License | 5 votes |
def run(self): if self.as_numpy: import numpy # don't clutter the global namespace with a dependency on numpy it = iter(self.corpus) while True: chunk = itertools.islice(it, self.chunksize) if self.as_numpy: # HACK XXX convert documents to numpy arrays, to save memory. # This also gives a scipy warning at runtime: # "UserWarning: indices array has non-integer dtype (float64)" wrapped_chunk = [[numpy.asarray(doc) for doc in chunk]] else: wrapped_chunk = [list(chunk)] if not wrapped_chunk[0]: self.q.put(None, block=True) break try: qsize = self.q.qsize() except NotImplementedError: qsize = '?' logger.debug("prepared another chunk of %i documents (qsize=%s)" % (len(wrapped_chunk[0]), qsize)) self.q.put(wrapped_chunk.pop(), block=True) #endclass InputQueue
Example #12
Source File: measurement_search.py From ripe-atlas-tools with GNU General Public License v3.0 | 5 votes |
def run(self): if not self.arguments.field: self.arguments.field = ("id", "type", "description", "status") filters = self._get_filters() measurements = MeasurementRequest( return_objects=True, user_agent=self.user_agent, **filters) truncated_measurements = itertools.islice( measurements, self.arguments.limit) if self.arguments.ids_only: for measurement in truncated_measurements: print(measurement.id) return hr = self._get_horizontal_rule() print(self._get_filter_display(filters)) print(self._get_header()) print(colourise(hr, "bold")) for measurement in truncated_measurements: print(colourise(self._get_line_format().format( *self._get_line_items(measurement) ), self._get_colour_from_status(measurement.status_id))) print(colourise(hr, "bold")) # Print total count of found measurements print(("{:>" + str(len(hr)) + "}\n").format( "Showing {} of {} total measurements".format( min(self.arguments.limit, measurements.total_count), measurements.total_count ) ))
Example #13
Source File: instaloader_unittests.py From instaloader with MIT License | 5 votes |
def post_paging_test(self, iterator): previous_post = None for post in islice(iterator, PAGING_MAX_COUNT): print(post) if previous_post: self.assertTrue(post.date_utc < previous_post.date_utc) previous_post = post
Example #14
Source File: instaloader_unittests.py From instaloader with MIT License | 5 votes |
def test_public_profile_tagged_paging(self): for post in islice(instaloader.Profile.from_username(self.L.context, PUBLIC_PROFILE).get_tagged_posts(), PAGING_MAX_COUNT): print(post)
Example #15
Source File: instaloader_unittests.py From instaloader with MIT License | 5 votes |
def test_public_profile_igtv(self): for post in islice(instaloader.Profile.from_username(self.L.context, PUBLIC_PROFILE_WITH_IGTV).get_igtv_posts(), PAGING_MAX_COUNT): print(post)
Example #16
Source File: word2vecReaderUtils.py From word2vec-twitter with MIT License | 5 votes |
def __iter__(self): return itertools.islice(itertools.cycle(self.corpus), self.reps)
Example #17
Source File: newrange.py From misp42splunk with GNU Lesser General Public License v3.0 | 5 votes |
def __init__(self, range_): self._stepper = islice(count(range_.start, range_.step), len(range_))
Example #18
Source File: search_command.py From misp42splunk with GNU Lesser General Public License v3.0 | 5 votes |
def _prepare_recording(self, argv, ifile, ofile): # Create the recordings directory, if it doesn't already exist recordings = os.path.join(environment.splunk_home, 'var', 'run', 'splunklib.searchcommands', 'recordings') if not os.path.isdir(recordings): os.makedirs(recordings) # Create input/output recorders from ifile and ofile recording = os.path.join(recordings, self.__class__.__name__ + '-' + repr(time()) + '.' + self._metadata.action) ifile = Recorder(recording + '.input', ifile) ofile = Recorder(recording + '.output', ofile) # Archive the dispatch directory--if it exists--so that it can be used as a baseline in mocks) dispatch_dir = self._metadata.searchinfo.dispatch_dir if dispatch_dir is not None: # __GETINFO__ action does not include a dispatch_dir root_dir, base_dir = os.path.split(dispatch_dir) make_archive(recording + '.dispatch_dir', 'gztar', root_dir, base_dir, logger=self.logger) # Save a splunk command line because it is useful for developing tests with open(recording + '.splunk_cmd', 'wb') as f: f.write('splunk cmd python '.encode()) f.write(os.path.basename(argv[0]).encode()) for arg in islice(argv, 1, len(argv)): f.write(' '.encode()) f.write(arg.encode()) return ifile, ofile
Example #19
Source File: utils.py From Telethon with MIT License | 5 votes |
def chunks(iterable, size=100): """ Turns the given iterable into chunks of the specified size, which is 100 by default since that's what Telegram uses the most. """ it = iter(iterable) size -= 1 for head in it: yield itertools.chain([head], itertools.islice(it, size))
Example #20
Source File: newrange.py From verge3d-blender-addon with GNU General Public License v3.0 | 5 votes |
def __init__(self, range_): self._stepper = islice(count(range_.start, range_.step), len(range_))
Example #21
Source File: importer.py From pydfs-lineup-optimizer with MIT License | 5 votes |
def import_players(self): with open(self.filename, 'r') as csv_file: start_line = 0 # Find line with 'TeamAbbrev', that's where players data starts while True: line = csv_file.readline() if 'TeamAbbrev' in line: csv_file.seek(0) csv_data = csv.DictReader(islice(csv_file, start_line, None), skipinitialspace=True) return [self._row_to_player(row) for row in csv_data] elif line == '': raise LineupOptimizerIncorrectCSV else: start_line += 1
Example #22
Source File: importer.py From pydfs-lineup-optimizer with MIT License | 5 votes |
def import_players(self) -> List[Player]: with open(self.filename, 'r') as csv_file: start_line = 0 # Find line with 'FPPG', that's where players data starts while True: line = csv_file.readline() if 'FPPG' in line: csv_file.seek(0) csv_data = csv.DictReader(islice(csv_file, start_line, None), skipinitialspace=True) return [self._row_to_player(row) for row in csv_data] elif line == '': raise LineupOptimizerIncorrectCSV else: start_line += 1
Example #23
Source File: test_core.py From lineflow with MIT License | 5 votes |
def setUp(self): self.data = Dataset(range(100)) window_size = 3 expected = [] it = iter(range(100)) window = tuple(itertools.islice(it, window_size)) while window: expected.append(window) window = tuple(itertools.islice(it, window_size)) self.expected = expected self.window_size = window_size
Example #24
Source File: core.py From lineflow with MIT License | 5 votes |
def take(self, n: int) -> List[Any]: """Takes the first n examples from the dataset. Args: n (int): the number of examples to take. Returns (List[Any]): The list of the ``n`` examples. """ return list(islice(self, n))
Example #25
Source File: wordnet.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def morphy(self, form, pos=None): """ Find a possible base form for the given form, with the given part of speech, by checking WordNet's list of exceptional forms, and by recursively stripping affixes for this part of speech until a form in WordNet is found. >>> from nltk.corpus import wordnet as wn >>> print(wn.morphy('dogs')) dog >>> print(wn.morphy('churches')) church >>> print(wn.morphy('aardwolves')) aardwolf >>> print(wn.morphy('abaci')) abacus >>> wn.morphy('hardrock', wn.ADV) >>> print(wn.morphy('book', wn.NOUN)) book >>> wn.morphy('book', wn.ADJ) """ if pos is None: morphy = self._morphy analyses = chain(a for p in POS_LIST for a in morphy(form, p)) else: analyses = self._morphy(form, pos) # get the first one we find first = list(islice(analyses, 1)) if len(first) == 1: return first[0] else: return None
Example #26
Source File: chomsky.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def generate_chomsky(times=5, line_length=72): parts = [] for part in (leadins, subjects, verbs, objects): phraselist = list(map(str.strip, part.splitlines())) random.shuffle(phraselist) parts.append(phraselist) output = chain(*islice(izip(*parts), 0, times)) print(textwrap.fill(" ".join(output), line_length))
Example #27
Source File: toolbox.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def demo(): from itertools import islice # zip_path = find('corpora/toolbox.zip') # lexicon = ToolboxData(ZipFilePathPointer(zip_path, 'toolbox/rotokas.dic')).parse() file_path = find('corpora/toolbox/rotokas.dic') lexicon = ToolboxData(file_path).parse() print('first field in fourth record:') print(lexicon[3][0].tag) print(lexicon[3][0].text) print('\nfields in sequential order:') for field in islice(lexicon.find('record'), 10): print(field.tag, field.text) print('\nlx fields:') for field in islice(lexicon.findall('record/lx'), 10): print(field.text) settings = ToolboxSettings() file_path = find('corpora/toolbox/MDF/MDF_AltH.typ') settings.open(file_path) # settings.open(ZipFilePathPointer(zip_path, entry='toolbox/MDF/MDF_AltH.typ')) tree = settings.parse(unwrap=False, encoding='cp1252') print(tree.find('expset/expMDF/rtfPageSetup/paperSize').text) settings_tree = ElementTree(tree) print(to_settings_string(settings_tree).encode('utf8'))
Example #28
Source File: util.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def iterate_from(self, start_index): if start_index < self._offsets[-1]: sublist_index = bisect.bisect_right(self._offsets, start_index)-1 else: sublist_index = len(self._offsets)-1 index = self._offsets[sublist_index] # Construct an iterator over the sublists. if isinstance(self._list, AbstractLazySequence): sublist_iter = self._list.iterate_from(sublist_index) else: sublist_iter = islice(self._list, sublist_index, None) for sublist in sublist_iter: if sublist_index == (len(self._offsets)-1): assert index+len(sublist) >= self._offsets[-1], ( 'offests not monotonic increasing!') self._offsets.append(index+len(sublist)) else: assert self._offsets[sublist_index+1] == index+len(sublist), ( 'inconsistent list value (num elts)') for value in sublist[max(0, start_index-index):]: yield value index += len(sublist) sublist_index += 1
Example #29
Source File: util.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def iterate_from(self, start): return islice(self._source.iterate_from(start+self._start), max(0, len(self)-start))
Example #30
Source File: util.py From razzy-spinner with GNU General Public License v3.0 | 5 votes |
def __new__(cls, source, start, stop): """ Construct a new slice from a given underlying sequence. The ``start`` and ``stop`` indices should be absolute indices -- i.e., they should not be negative (for indexing from the back of a list) or greater than the length of ``source``. """ # If the slice is small enough, just use a tuple. if stop-start < cls.MIN_SIZE: return list(islice(source.iterate_from(start), stop-start)) else: return object.__new__(cls)