Python re.finditer() Examples

The following are 30 code examples of re.finditer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module re , or try the search function

Example #1

Source File: _string.py From jawfish with MIT License

6 votes

def formatter_parser(*args,**kw):
    """parse the argument as a format string"""

    assert len(args)==1
    assert isinstance(args[0], str)

    _result=[]
    for _match in re.finditer("([^{]*)?(\{[^}]*\})?", args[0]):
        _pre, _fmt = _match.groups()
        if _fmt is None:
           _result.append((_pre, None, None, None))
        elif _fmt == '{}':
           _result.append((_pre, '', '', None))
        else:
           _m=re.match("\{([^!]*)!?(.*)?\}", _fmt)
           _name=_m.groups(0)
           _flags=_m.groups(1)

           _result.append((_pre, _name, _flags, None))

    return _result

Example #2

Source File: adapter.py From yang-explorer with Apache License 2.0

6 votes

def get_ydk_def_names(python_ydk_defs):
        """
        Get the Python YDK definition names
        """

        logging.debug('get_ydk_def_names: python_ydk_defs : \n' + python_ydk_defs)

        import re

        ydk_def_names = ""
        for m in re.finditer(r"def \w+()", python_ydk_defs):
            logging.debug('get_ydk_def_names: m.group(0): \n' + m.group(0))
            tmp_str = m.group(0).replace('def ', '')
            ydk_def_names = ydk_def_names + tmp_str + " "

        logging.debug('get_ydk_def_names: ydk_def_names : \n' + ydk_def_names)

        return ydk_def_names

Example #3

Source File: file_dialog.py From panda3dstudio with BSD 3-Clause "New" or "Revised" License

6 votes

def get_incremented_filename(filename, namestring):

    import re

    min_index = 1
    pattern = r"(.*?)(\s*)(\d*)$"
    basename, space, index_str = re.search(pattern, filename).groups()
    search_pattern = fr"^{re.escape(basename)}\s*(\d+)$"

    if index_str:
        min_index = int(index_str)
        zero_padding = len(index_str) if index_str.startswith("0") else 0
        naming_pattern = basename + space + "{:0" + str(zero_padding) + "d}"
    else:
        naming_pattern = basename + " {:02d}"

    names = re.finditer(search_pattern, namestring, re.I | re.M)
    inds = [int(name.group(1)) for name in names]
    max_index = min_index + len(inds)

    for i in range(min_index, max_index):
        if i not in inds:
            return naming_pattern.format(i)

    return naming_pattern.format(max_index)

Example #4

Source File: texttiling.py From razzy-spinner with GNU General Public License v3.0

6 votes

def _mark_paragraph_breaks(self, text):
        """Identifies indented text or line breaks as the beginning of
        paragraphs"""

        MIN_PARAGRAPH = 100
        pattern = re.compile("[ \t\r\f\v]*\n[ \t\r\f\v]*\n[ \t\r\f\v]*")
        matches = pattern.finditer(text)

        last_break = 0
        pbreaks = [0]
        for pb in matches:
            if pb.start()-last_break < MIN_PARAGRAPH:
                continue
            else:
                pbreaks.append(pb.start())
                last_break = pb.start()

        return pbreaks

Example #5

Source File: parser.py From jbox with MIT License

6 votes

def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv

Example #6

Source File: chunkparser_app.py From razzy-spinner with GNU General Public License v3.0

6 votes

def _syntax_highlight_grammar(self, grammar):
        if self.top is None: return
        self.grammarbox.tag_remove('comment', '1.0', 'end')
        self.grammarbox.tag_remove('angle', '1.0', 'end')
        self.grammarbox.tag_remove('brace', '1.0', 'end')
        self.grammarbox.tag_add('hangindent', '1.0', 'end')
        for lineno, line in enumerate(grammar.split('\n')):
            if not line.strip(): continue
            m = re.match(r'(\\.|[^#])*(#.*)?', line)
            comment_start = None
            if m.group(2):
                comment_start = m.start(2)
                s = '%d.%d' % (lineno+1, m.start(2))
                e = '%d.%d' % (lineno+1, m.end(2))
                self.grammarbox.tag_add('comment', s, e)
            for m in re.finditer('[<>{}]', line):
                if comment_start is not None and m.start() >= comment_start:
                    break
                s = '%d.%d' % (lineno+1, m.start())
                e = '%d.%d' % (lineno+1, m.end())
                if m.group() in '<>':
                    self.grammarbox.tag_add('angle', s, e)
                else:
                    self.grammarbox.tag_add('brace', s, e)

Example #7

Source File: chunkparser_app.py From razzy-spinner with GNU General Public License v3.0

6 votes

def show_help(self, tab):
        self.helpbox['state'] = 'normal'
        self.helpbox.delete('1.0', 'end')
        for (name, tabstops, text) in self.HELP:
            if name == tab:
                text = text.replace('<<TAGSET>>', '\n'.join(
                    ('\t%s\t%s' % item for item in sorted(list(self.tagset.items()),
                    key=lambda t_w:re.match('\w+',t_w[0]) and (0,t_w[0]) or (1,t_w[0])))))

                self.helptabs[name].config(**self._HELPTAB_FG_PARAMS)
                self.helpbox.config(tabs=tabstops)
                self.helpbox.insert('1.0', text+'\n'*20)
                C = '1.0 + %d chars'
                for (tag, params) in self.HELP_AUTOTAG:
                    pattern = '(?s)(<%s>)(.*?)(</%s>)' % (tag, tag)
                    for m in re.finditer(pattern, text):
                        self.helpbox.tag_add('elide',
                                             C % m.start(1), C % m.end(1))
                        self.helpbox.tag_add('tag-%s' % tag,
                                             C % m.start(2), C % m.end(2))
                        self.helpbox.tag_add('elide',
                                             C % m.start(3), C % m.end(3))
            else:
                self.helptabs[name].config(**self._HELPTAB_BG_PARAMS)
        self.helpbox['state'] = 'disabled'

Example #8

Source File: tree.py From razzy-spinner with GNU General Public License v3.0

6 votes

def _ieer_read_text(s, top_node):
    stack = [Tree(top_node, [])]
    for piece_m in re.finditer('<[^>]+>|[^\s<]+', s):
        piece = piece_m.group()
        try:
            if piece.startswith('<b_'):
                m = _IEER_TYPE_RE.match(piece)
                if m is None: print 'XXXX', piece
                chunk = Tree(m.group('type'), [])
                stack[-1].append(chunk)
                stack.append(chunk)
            elif piece.startswith('<e_'):
                stack.pop()
#           elif piece.startswith('<'):
#               print "ERROR:", piece
#               raise ValueError # Unexpected HTML
            else:
                stack[-1].append(piece)
        except (IndexError, ValueError):
            raise ValueError('Bad IEER string (error at character %d)' %
                             piece_m.start())
    if len(stack) != 1:
        raise ValueError('Bad IEER string')
    return stack[0]

Example #9

Source File: parser.py From recruit with Apache License 2.0

6 votes

def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv

Example #10

Source File: sources.py From brownie with MIT License

6 votes

def get_pragma_spec(source: str, path: Optional[str] = None) -> NpmSpec:

    """
    Extracts pragma information from Solidity source code.

    Args:
        source: Solidity source code
        path: Optional path to the source (only used for error reporting)

    Returns: NpmSpec object
    """

    pragma_match = next(re.finditer(r"pragma +solidity([^;]*);", source), None)
    if pragma_match is not None:
        pragma_string = pragma_match.groups()[0]
        pragma_string = " ".join(pragma_string.split())
        return NpmSpec(pragma_string)
    if path:
        raise PragmaError(f"No version pragma in '{path}'")
    raise PragmaError("String does not contain a version pragma")

Example #11

Source File: __init__.py From ChromaTerm with MIT License

6 votes

def get_matches(self, data):
        """Returns a list of tuples, each of which containing a start index, an
        end index, and the [chromaterm.Color][] object for that match. Only regex
        groups associated with a color are included.

        Args:
            data (str): A string to match regex against.
        """
        if not self.colors:
            return []

        matches = []

        for match in self.regex.finditer(data):
            for group in self.colors:
                start, end = match.span(group)

                # Zero-length match or optional group not in the match
                if start == end:
                    continue

                matches.append((start, end, self.colors[group]))

        return matches

Example #12

Source File: test_re.py From jawfish with MIT License

6 votes

def test_finditer(self):
        iter = re.finditer(r":+", "a:b::c:::d")
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", 1, 10)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
        self.assertEqual([item.group(0) for item in iter],
                         ["::", "::"])

Example #13

Source File: remap.py From svviz with MIT License

6 votes

def tryAlignExact(query, revquery, target, aligner):
    f_results = [m for m in re.finditer(query, target)]
    r_results = [m for m in re.finditer(revquery, target)]

    if len(f_results) > 0:
        aln = RemapAlignment(f_results[0], query, aligner.match)
        strand = "+"
    elif len(r_results) > 0:
        aln = RemapAlignment(r_results[0], revquery, aligner.match)
        strand = "-"
    else:
        return None

    if len(f_results) + len(r_results) > 1:
        aln.score2 = aln.score

    return strand, aln

Example #14

Source File: tnslsnr-ping.py From zbxdb with GNU General Public License v3.0

6 votes

def ParseNestedParen(string, level):
    """
    Generate strings contained in nested (), indexing i = level
    """

    if len(re.findall(r"\(", string)) == len(re.findall(r"\)", string)):
        LeftRightIndex = [x for x in zip(
            [Left.start()+1 for Left in re.finditer(r'\(', string)],
            reversed([Right.start() for Right in re.finditer(r'\)', string)]))]

    elif len(re.findall(r"\(", string)) > len(re.findall(r"\)", string)):
        return ParseNestedParen(string + ')', level)

    elif len(re.findall(r"\(", string)) < len(re.findall(r"\)", string)):
        return ParseNestedParen('(' + string, level)

    else:
        return 'fail'

    return [string[LeftRightIndex[level][0]:LeftRightIndex[level][1]]]

Example #15

Source File: gadgets.py From rop-chainer with GNU General Public License v3.0

6 votes

def _locate_gadgets(self, section, terminals, gadget_type):
        disassembler = cs.Cs(cs.CS_ARCH_X86, cs.CS_MODE_32)
        for terminal in terminals:
            matches = [match.start() for match in re.finditer(terminal[0],
                                                              section["data"])]
            for index in matches:
                for i in range(self._options.depth):
                    gadget = ""
                    instructions = disassembler.disasm_lite(
                        section["data"][index-i:index+terminal[1]],
                        section["vaddr"]+index)
                    for instruction in instructions:
                        gadget += (str(instruction[2]) + " " +
                                   str(instruction[3])   + " ; ")

                    if gadget:
                        gadget = gadget.replace("  ", " ")
                        gadget = gadget[:-3]
                        self._gadgets += [{"vaddr" : section["vaddr"]+index-i,
                                           "insts" : gadget,
                                           "gadget_type" : gadget_type}]

Example #16

Source File: generate_test.py From rekall with GNU General Public License v2.0

6 votes

def ExtractTestCases(data):
    test_cases = []

    # The start of the section is at this file offset. This is mapped into
    # memory at the .text segment (which is at offset 0).
    m = re.search("__start__", data)
    origin = m.start()

    for match in re.finditer(
        r"(---.*?\.\.\.)\n<bin>(.+?)</bin>", data, re.M | re.S):
        offset, _ = match.span(2)

        # Replace the assembled segment with a base64 equivalent.
        segment = yaml.safe_load(match.group(1))
        segment["offset"] = offset - origin
        segment["data"] = match.group(2).encode("base64").strip()
        test_cases.append(segment)

    return test_cases

Example #17

Source File: TextDecorators.py From dcc with Apache License 2.0

6 votes

def decorate(self, pageOffset=None):
        page = self.decorated.decorate(pageOffset)

        self.PenInterval = self.decorated.PenInterval
        self.brushMap = self.decorated.brushMap
        self.penMap = self.decorated.penMap

        off = self.dataModel.getOffset()

        Match = [(m.start(), m.end()) for m in re.finditer(b'([a-zA-Z0-9\\-\\\\.%*:/? _<>]){4,}', page)]
        for s, e in Match:
            for i in range(e - s):
                idx = off + s + i
                if idx not in self.penMap:
                    self.penMap[off + s + i] = self.redPen

        self.page = page
        return self.page

Example #18

Source File: text.py From django-rest-registration with MIT License

6 votes

def _assert_urls_in_text(text, expected_num, line_url_pattern):
    lines = [line.rstrip() for line in text.split('\n')]
    urls = []
    for line in lines:
        for match in re.finditer(line_url_pattern, line):
            match_groupdict = match.groupdict()
            urls.append(match_groupdict['url'])
    num_of_urls = len(urls)
    msg_format = "Found {num_of_urls} urls instead of {expected_num} in:\n{text}"  # noqa: E501
    msg = msg_format.format(
        num_of_urls=num_of_urls,
        expected_num=expected_num,
        text=text,
    )
    assert num_of_urls == expected_num, msg
    return urls

Example #19

Source File: to_wiki.py From mindustry-modding with GNU General Public License v3.0

6 votes

def normalize(md):
    '''Normalize anchors.'''
    def on_match(link):
        desc = link.group(1)
        old = link.group(2)
        href = (link.group(2)
                .lower()
                .replace('%20', '-')
                .replace(" ", "-")
                .replace("~", "")
                .replace(".", ""))
        old, new = f'[{desc}]({old})', f'[{desc}]({href})'
        print(old, new)
        return old, new

    replacers = set((on_match(x) for x in re.finditer(r'\[([^\]\[]*)\]\((#[^\)]*)\)', md)))
    return ft.reduce(lambda md, x: md.replace(x[0], x[1]), replacers, md)

Example #20

Source File: parser.py From pcocc with GNU General Public License v3.0

6 votes

def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv

Example #21

Source File: testcases.py From django-rest-registration with MIT License

6 votes

def _assert_urls_in_text(self, text, expected_num, line_url_pattern):
        lines = [line.rstrip() for line in text.split('\n')]
        urls = []
        for line in lines:
            for match in re.finditer(line_url_pattern, line):
                match_groupdict = match.groupdict()
                urls.append(match_groupdict['url'])
        num_of_urls = len(urls)
        msg_format = "Found {num_of_urls} urls instead of {expected_num} in:\n{text}"  # noqa: E501
        msg = msg_format.format(
            num_of_urls=num_of_urls,
            expected_num=expected_num,
            text=text,
        )
        self.assertEqual(num_of_urls, expected_num, msg=msg)
        return urls

Example #22

Source File: Sentence.py From Snowball with GNU General Public License v3.0

6 votes

def __init__(self, _sentence, e1_type, e2_type):
        self.relationships = set()
        self.sentence = _sentence
        self.entities = list()
        self.valid = False
        self.tree = None
        self.deps = None

        for m in re.finditer(regex, self.sentence):
            self.entities.append(m.group())

        for e1 in self.entities:
            for e2 in self.entities:
                if e1 == e2:
                    continue
                arg1match = re.match("<([A-Z]+)>", e1)
                arg2match = re.match("<([A-Z]+)>", e2)
                if arg1match.group(1) == e1_type and arg2match.group(1) == e2_type:
                    self.valid = True
                    break;

Example #23

Source File: typing.py From pytorch_geometric with MIT License

6 votes

def parse_types(func: Callable) -> List[Tuple[Dict[str, str], str]]:
    source = inspect.getsource(func)
    signature = inspect.signature(func)

    # Parse `# type: (...) -> ...` annotation. Note that it is allowed to pass
    # multiple `# type:` annotations in `forward()`.
    iterator = re.finditer(r'#\s*type:\s*\((.*)\)\s*->\s*(.*)\s*\n', source)
    matches = list(iterator)

    if len(matches) > 0:
        out = []
        args = list(signature.parameters.keys())
        for match in matches:
            arg_types_repr, return_type = match.groups()
            arg_types = split_types_repr(arg_types_repr)
            arg_types = OrderedDict((k, v) for k, v in zip(args, arg_types))
            return_type = return_type.split('#')[0].strip()
            out.append((arg_types, return_type))
        return out

    # Alternatively, parse annotations using the inspected signature.
    else:
        ps = signature.parameters
        arg_types = OrderedDict((k, param_type_repr(v)) for k, v in ps.items())
        return [(arg_types, return_type_repr(signature))]

Example #24

Source File: SSLServerConnection.py From 3vilTwinAttacker with MIT License

5 votes

def replaceCssLinks(self, data):
        iterator = re.finditer(SSLServerConnection.cssExpression, data)

        for match in iterator:
            self.buildAbsoluteLink(match.group(1))

        return data

Example #25

Source File: tarfile.py From recruit with Apache License 2.0

5 votes

def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes))

Example #26

Source File: SSLServerConnection.py From 3vilTwinAttacker with MIT License

5 votes

def replaceSecureLinks(self, data):
        data = ServerConnection.replaceSecureLinks(self, data)
        data = self.replaceCssLinks(data)

        if (self.urlMonitor.isFaviconSpoofing()):
            data = self.replaceFavicon(data)

        iterator = re.finditer(SSLServerConnection.linkExpression, data)

        for match in iterator:
            self.buildAbsoluteLink(match.group(10))

        return data

Example #27

Source File: utils.py From bamnostic with BSD 3-Clause "New" or "Revised" License

5 votes

def parse_cigar(cigar_str):
    """Parses a CIGAR string and turns it into a list of tuples

    Args:
        cigar_str (str): the CIGAR string as shown in SAM entry

    Returns:
        cigar_array (list): list of tuples of CIGAR operations (by id) and number of operations

    Raises:
        ValueError: if CIGAR operation is invalid

    Examples:
        >>> parse_cigar('3M1I3M1D5M') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        [(('BAM_CMATCH', 0), 3), ..., (('BAM_CMATCH', 0), 5)]

    """
    cigar_array = []
    for cigar_op in re.finditer(r'(?P<n_op>\d+)(?P<op>\w)', cigar_str):
        op_dict = cigar_op.groupdict()
        n_ops = int(op_dict['n_op'])
        op = _CIGAR_OPS.get(op_dict['op'], -1)
        if op == -1:
            raise ValueError('Invalid CIGAR operation ({}).'.format(op_dict['op']))
        cigar_array.append((op, n_ops))
    return cigar_array

Example #28

Source File: ServerConnection.py From 3vilTwinAttacker with MIT License

5 votes

def replaceSecureLinks(self, data):
        sustitucion = {}
        patchDict = self.urlMonitor.patchDict
        if len(patchDict)>0:
        	dregex = re.compile("(%s)" % "|".join(map(re.escape, patchDict.keys())))
        	data = dregex.sub(lambda x: str(patchDict[x.string[x.start() :x.end()]]), data)
		
		iterator = re.finditer(ServerConnection.urlExpression, data)       
        for match in iterator:
            url = match.group()
			
            logging.debug("Found secure reference: " + url)
            nuevaurl=self.urlMonitor.addSecureLink(self.client.getClientIP(), url)
            logging.debug("LEO replacing %s => %s"%(url,nuevaurl))
            sustitucion[url] = nuevaurl
            #data.replace(url,nuevaurl)
        
        #data = self.urlMonitor.DataReemplazo(data)
        if len(sustitucion)>0:
        	dregex = re.compile("(%s)" % "|".join(map(re.escape, sustitucion.keys())))
        	data = dregex.sub(lambda x: str(sustitucion[x.string[x.start() :x.end()]]), data)
        
        #logging.debug("LEO DEBUG received data:\n"+data)	
        #data = re.sub(ServerConnection.urlExplicitPort, r'https://\1/', data)
        #data = re.sub(ServerConnection.urlTypewww, 'http://w', data)
        #if data.find("http://w.face")!=-1:
        #	logging.debug("LEO DEBUG Found error in modifications")
        #	raw_input("Press Enter to continue")
        #return re.sub(ServerConnection.urlType, 'http://web.', data)
        return data

Example #29

Source File: Sentence.py From Snowball with GNU General Public License v3.0

5 votes

def __init__(self, _sentence, _before=None, _between=None, _after=None, _ent1=None, _ent2=None, _arg1type=None,
                 _arg2type=None, _type=None):

        self.sentence = _sentence
        self.rel_type = _type
        self.before = _before
        self.between = _between
        self.after = _after
        self.ent1 = _ent1
        self.ent2 = _ent2
        self.arg1type = _arg1type
        self.arg2type = _arg2type

        if _before is None and _between is None and _after is None and _sentence is not None:
            matches = []
            for m in re.finditer(regex_linked, self.sentence):
                matches.append(m)

            for x in range(0, len(matches) - 1):
                if x == 0:
                    start = 0
                if x > 0:
                    start = matches[x - 1].end()
                try:
                    end = matches[x + 2].init_bootstrapp()
                except IndexError:
                    end = len(self.sentence) - 1

                self.before = self.sentence[start:matches[x].init_bootstrapp()]
                self.between = self.sentence[matches[x].end():matches[x + 1].init_bootstrapp()]
                self.after = self.sentence[matches[x + 1].end(): end]
                self.ent1 = matches[x].group()
                self.ent2 = matches[x + 1].group()
                arg1match = re.match("<[A-Z]+>", self.ent1)
                arg2match = re.match("<[A-Z]+>", self.ent2)
                self.arg1type = arg1match.group()[1:-1]
                self.arg2type = arg2match.group()[1:-1]

Example #30

Source File: tarfile.py From jbox with MIT License

5 votes

def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes))