Python re.finditer() Examples

The following are 30 code examples for showing how to use re.finditer(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module re , or try the search function .

Example 1
Project: svviz   Author: svviz   File: remap.py    License: MIT License 6 votes vote down vote up
def tryAlignExact(query, revquery, target, aligner):
    f_results = [m for m in re.finditer(query, target)]
    r_results = [m for m in re.finditer(revquery, target)]

    if len(f_results) > 0:
        aln = RemapAlignment(f_results[0], query, aligner.match)
        strand = "+"
    elif len(r_results) > 0:
        aln = RemapAlignment(r_results[0], revquery, aligner.match)
        strand = "-"
    else:
        return None

    if len(f_results) + len(r_results) > 1:
        aln.score2 = aln.score

    return strand, aln 
Example 2
Project: mindustry-modding   Author: SimonWoodburyForget   File: to_wiki.py    License: GNU General Public License v3.0 6 votes vote down vote up
def normalize(md):
    '''Normalize anchors.'''
    def on_match(link):
        desc = link.group(1)
        old = link.group(2)
        href = (link.group(2)
                .lower()
                .replace('%20', '-')
                .replace(" ", "-")
                .replace("~", "")
                .replace(".", ""))
        old, new = f'[{desc}]({old})', f'[{desc}]({href})'
        print(old, new)
        return old, new

    replacers = set((on_match(x) for x in re.finditer(r'\[([^\]\[]*)\]\((#[^\)]*)\)', md)))
    return ft.reduce(lambda md, x: md.replace(x[0], x[1]), replacers, md) 
Example 3
Project: rop-chainer   Author: wizh   File: gadgets.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _locate_gadgets(self, section, terminals, gadget_type):
        disassembler = cs.Cs(cs.CS_ARCH_X86, cs.CS_MODE_32)
        for terminal in terminals:
            matches = [match.start() for match in re.finditer(terminal[0],
                                                              section["data"])]
            for index in matches:
                for i in range(self._options.depth):
                    gadget = ""
                    instructions = disassembler.disasm_lite(
                        section["data"][index-i:index+terminal[1]],
                        section["vaddr"]+index)
                    for instruction in instructions:
                        gadget += (str(instruction[2]) + " " +
                                   str(instruction[3])   + " ; ")

                    if gadget:
                        gadget = gadget.replace("  ", " ")
                        gadget = gadget[:-3]
                        self._gadgets += [{"vaddr" : section["vaddr"]+index-i,
                                           "insts" : gadget,
                                           "gadget_type" : gadget_type}] 
Example 4
Project: zbxdb   Author: ikzelf   File: tnslsnr-ping.py    License: GNU General Public License v3.0 6 votes vote down vote up
def ParseNestedParen(string, level):
    """
    Generate strings contained in nested (), indexing i = level
    """

    if len(re.findall(r"\(", string)) == len(re.findall(r"\)", string)):
        LeftRightIndex = [x for x in zip(
            [Left.start()+1 for Left in re.finditer(r'\(', string)],
            reversed([Right.start() for Right in re.finditer(r'\)', string)]))]

    elif len(re.findall(r"\(", string)) > len(re.findall(r"\)", string)):
        return ParseNestedParen(string + ')', level)

    elif len(re.findall(r"\(", string)) < len(re.findall(r"\)", string)):
        return ParseNestedParen('(' + string, level)

    else:
        return 'fail'

    return [string[LeftRightIndex[level][0]:LeftRightIndex[level][1]]] 
Example 5
Project: jawfish   Author: war-and-code   File: _string.py    License: MIT License 6 votes vote down vote up
def formatter_parser(*args,**kw):
    """parse the argument as a format string"""

    assert len(args)==1
    assert isinstance(args[0], str)

    _result=[]
    for _match in re.finditer("([^{]*)?(\{[^}]*\})?", args[0]):
        _pre, _fmt = _match.groups()
        if _fmt is None:
           _result.append((_pre, None, None, None))
        elif _fmt == '{}':
           _result.append((_pre, '', '', None))
        else:
           _m=re.match("\{([^!]*)!?(.*)?\}", _fmt)
           _name=_m.groups(0)
           _flags=_m.groups(1)

           _result.append((_pre, _name, _flags, None))

    return _result 
Example 6
Project: jawfish   Author: war-and-code   File: test_re.py    License: MIT License 6 votes vote down vote up
def test_finditer(self):
        iter = re.finditer(r":+", "a:b::c:::d")
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", 1, 10)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
        self.assertEqual([item.group(0) for item in iter],
                         ["::", "::"]) 
Example 7
Project: razzy-spinner   Author: rafasashi   File: tree.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _ieer_read_text(s, top_node):
    stack = [Tree(top_node, [])]
    for piece_m in re.finditer('<[^>]+>|[^\s<]+', s):
        piece = piece_m.group()
        try:
            if piece.startswith('<b_'):
                m = _IEER_TYPE_RE.match(piece)
                if m is None: print 'XXXX', piece
                chunk = Tree(m.group('type'), [])
                stack[-1].append(chunk)
                stack.append(chunk)
            elif piece.startswith('<e_'):
                stack.pop()
#           elif piece.startswith('<'):
#               print "ERROR:", piece
#               raise ValueError # Unexpected HTML
            else:
                stack[-1].append(piece)
        except (IndexError, ValueError):
            raise ValueError('Bad IEER string (error at character %d)' %
                             piece_m.start())
    if len(stack) != 1:
        raise ValueError('Bad IEER string')
    return stack[0] 
Example 8
Project: razzy-spinner   Author: rafasashi   File: chunkparser_app.py    License: GNU General Public License v3.0 6 votes vote down vote up
def show_help(self, tab):
        self.helpbox['state'] = 'normal'
        self.helpbox.delete('1.0', 'end')
        for (name, tabstops, text) in self.HELP:
            if name == tab:
                text = text.replace('<<TAGSET>>', '\n'.join(
                    ('\t%s\t%s' % item for item in sorted(list(self.tagset.items()),
                    key=lambda t_w:re.match('\w+',t_w[0]) and (0,t_w[0]) or (1,t_w[0])))))

                self.helptabs[name].config(**self._HELPTAB_FG_PARAMS)
                self.helpbox.config(tabs=tabstops)
                self.helpbox.insert('1.0', text+'\n'*20)
                C = '1.0 + %d chars'
                for (tag, params) in self.HELP_AUTOTAG:
                    pattern = '(?s)(<%s>)(.*?)(</%s>)' % (tag, tag)
                    for m in re.finditer(pattern, text):
                        self.helpbox.tag_add('elide',
                                             C % m.start(1), C % m.end(1))
                        self.helpbox.tag_add('tag-%s' % tag,
                                             C % m.start(2), C % m.end(2))
                        self.helpbox.tag_add('elide',
                                             C % m.start(3), C % m.end(3))
            else:
                self.helptabs[name].config(**self._HELPTAB_BG_PARAMS)
        self.helpbox['state'] = 'disabled' 
Example 9
Project: razzy-spinner   Author: rafasashi   File: chunkparser_app.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _syntax_highlight_grammar(self, grammar):
        if self.top is None: return
        self.grammarbox.tag_remove('comment', '1.0', 'end')
        self.grammarbox.tag_remove('angle', '1.0', 'end')
        self.grammarbox.tag_remove('brace', '1.0', 'end')
        self.grammarbox.tag_add('hangindent', '1.0', 'end')
        for lineno, line in enumerate(grammar.split('\n')):
            if not line.strip(): continue
            m = re.match(r'(\\.|[^#])*(#.*)?', line)
            comment_start = None
            if m.group(2):
                comment_start = m.start(2)
                s = '%d.%d' % (lineno+1, m.start(2))
                e = '%d.%d' % (lineno+1, m.end(2))
                self.grammarbox.tag_add('comment', s, e)
            for m in re.finditer('[<>{}]', line):
                if comment_start is not None and m.start() >= comment_start:
                    break
                s = '%d.%d' % (lineno+1, m.start())
                e = '%d.%d' % (lineno+1, m.end())
                if m.group() in '<>':
                    self.grammarbox.tag_add('angle', s, e)
                else:
                    self.grammarbox.tag_add('brace', s, e) 
Example 10
Project: razzy-spinner   Author: rafasashi   File: texttiling.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _mark_paragraph_breaks(self, text):
        """Identifies indented text or line breaks as the beginning of
        paragraphs"""

        MIN_PARAGRAPH = 100
        pattern = re.compile("[ \t\r\f\v]*\n[ \t\r\f\v]*\n[ \t\r\f\v]*")
        matches = pattern.finditer(text)

        last_break = 0
        pbreaks = [0]
        for pb in matches:
            if pb.start()-last_break < MIN_PARAGRAPH:
                continue
            else:
                pbreaks.append(pb.start())
                last_break = pb.start()

        return pbreaks 
Example 11
Project: yang-explorer   Author: CiscoDevNet   File: adapter.py    License: Apache License 2.0 6 votes vote down vote up
def get_ydk_def_names(python_ydk_defs):
        """
        Get the Python YDK definition names
        """

        logging.debug('get_ydk_def_names: python_ydk_defs : \n' + python_ydk_defs)

        import re

        ydk_def_names = ""
        for m in re.finditer(r"def \w+()", python_ydk_defs):
            logging.debug('get_ydk_def_names: m.group(0): \n' + m.group(0))
            tmp_str = m.group(0).replace('def ', '')
            ydk_def_names = ydk_def_names + tmp_str + " "

        logging.debug('get_ydk_def_names: ydk_def_names : \n' + ydk_def_names)

        return ydk_def_names 
Example 12
Project: dcc   Author: amimo   File: TextDecorators.py    License: Apache License 2.0 6 votes vote down vote up
def decorate(self, pageOffset=None):
        page = self.decorated.decorate(pageOffset)

        self.PenInterval = self.decorated.PenInterval
        self.brushMap = self.decorated.brushMap
        self.penMap = self.decorated.penMap

        off = self.dataModel.getOffset()

        Match = [(m.start(), m.end()) for m in re.finditer(b'([a-zA-Z0-9\\-\\\\.%*:/? _<>]){4,}', page)]
        for s, e in Match:
            for i in range(e - s):
                idx = off + s + i
                if idx not in self.penMap:
                    self.penMap[off + s + i] = self.redPen

        self.page = page
        return self.page 
Example 13
Project: panda3dstudio   Author: Epihaius   File: file_dialog.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_incremented_filename(filename, namestring):

    import re

    min_index = 1
    pattern = r"(.*?)(\s*)(\d*)$"
    basename, space, index_str = re.search(pattern, filename).groups()
    search_pattern = fr"^{re.escape(basename)}\s*(\d+)$"

    if index_str:
        min_index = int(index_str)
        zero_padding = len(index_str) if index_str.startswith("0") else 0
        naming_pattern = basename + space + "{:0" + str(zero_padding) + "d}"
    else:
        naming_pattern = basename + " {:02d}"

    names = re.finditer(search_pattern, namestring, re.I | re.M)
    inds = [int(name.group(1)) for name in names]
    max_index = min_index + len(inds)

    for i in range(min_index, max_index):
        if i not in inds:
            return naming_pattern.format(i)

    return naming_pattern.format(max_index) 
Example 14
Project: brownie   Author: eth-brownie   File: sources.py    License: MIT License 6 votes vote down vote up
def get_pragma_spec(source: str, path: Optional[str] = None) -> NpmSpec:

    """
    Extracts pragma information from Solidity source code.

    Args:
        source: Solidity source code
        path: Optional path to the source (only used for error reporting)

    Returns: NpmSpec object
    """

    pragma_match = next(re.finditer(r"pragma +solidity([^;]*);", source), None)
    if pragma_match is not None:
        pragma_string = pragma_match.groups()[0]
        pragma_string = " ".join(pragma_string.split())
        return NpmSpec(pragma_string)
    if path:
        raise PragmaError(f"No version pragma in '{path}'")
    raise PragmaError("String does not contain a version pragma") 
Example 15
Project: django-rest-registration   Author: apragacz   File: text.py    License: MIT License 6 votes vote down vote up
def _assert_urls_in_text(text, expected_num, line_url_pattern):
    lines = [line.rstrip() for line in text.split('\n')]
    urls = []
    for line in lines:
        for match in re.finditer(line_url_pattern, line):
            match_groupdict = match.groupdict()
            urls.append(match_groupdict['url'])
    num_of_urls = len(urls)
    msg_format = "Found {num_of_urls} urls instead of {expected_num} in:\n{text}"  # noqa: E501
    msg = msg_format.format(
        num_of_urls=num_of_urls,
        expected_num=expected_num,
        text=text,
    )
    assert num_of_urls == expected_num, msg
    return urls 
Example 16
Project: django-rest-registration   Author: apragacz   File: testcases.py    License: MIT License 6 votes vote down vote up
def _assert_urls_in_text(self, text, expected_num, line_url_pattern):
        lines = [line.rstrip() for line in text.split('\n')]
        urls = []
        for line in lines:
            for match in re.finditer(line_url_pattern, line):
                match_groupdict = match.groupdict()
                urls.append(match_groupdict['url'])
        num_of_urls = len(urls)
        msg_format = "Found {num_of_urls} urls instead of {expected_num} in:\n{text}"  # noqa: E501
        msg = msg_format.format(
            num_of_urls=num_of_urls,
            expected_num=expected_num,
            text=text,
        )
        self.assertEqual(num_of_urls, expected_num, msg=msg)
        return urls 
Example 17
Project: recruit   Author: Frank-qlu   File: parser.py    License: Apache License 2.0 6 votes vote down vote up
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv 
Example 18
Project: Snowball   Author: davidsbatista   File: Sentence.py    License: GNU General Public License v3.0 6 votes vote down vote up
def __init__(self, _sentence, e1_type, e2_type):
        self.relationships = set()
        self.sentence = _sentence
        self.entities = list()
        self.valid = False
        self.tree = None
        self.deps = None

        for m in re.finditer(regex, self.sentence):
            self.entities.append(m.group())

        for e1 in self.entities:
            for e2 in self.entities:
                if e1 == e2:
                    continue
                arg1match = re.match("<([A-Z]+)>", e1)
                arg2match = re.match("<([A-Z]+)>", e2)
                if arg1match.group(1) == e1_type and arg2match.group(1) == e2_type:
                    self.valid = True
                    break; 
Example 19
Project: pytorch_geometric   Author: rusty1s   File: typing.py    License: MIT License 6 votes vote down vote up
def parse_types(func: Callable) -> List[Tuple[Dict[str, str], str]]:
    source = inspect.getsource(func)
    signature = inspect.signature(func)

    # Parse `# type: (...) -> ...` annotation. Note that it is allowed to pass
    # multiple `# type:` annotations in `forward()`.
    iterator = re.finditer(r'#\s*type:\s*\((.*)\)\s*->\s*(.*)\s*\n', source)
    matches = list(iterator)

    if len(matches) > 0:
        out = []
        args = list(signature.parameters.keys())
        for match in matches:
            arg_types_repr, return_type = match.groups()
            arg_types = split_types_repr(arg_types_repr)
            arg_types = OrderedDict((k, v) for k, v in zip(args, arg_types))
            return_type = return_type.split('#')[0].strip()
            out.append((arg_types, return_type))
        return out

    # Alternatively, parse annotations using the inspected signature.
    else:
        ps = signature.parameters
        arg_types = OrderedDict((k, param_type_repr(v)) for k, v in ps.items())
        return [(arg_types, return_type_repr(signature))] 
Example 20
Project: jbox   Author: jpush   File: parser.py    License: MIT License 6 votes vote down vote up
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv 
Example 21
Project: ChromaTerm   Author: hSaria   File: __init__.py    License: MIT License 6 votes vote down vote up
def get_matches(self, data):
        """Returns a list of tuples, each of which containing a start index, an
        end index, and the [chromaterm.Color][] object for that match. Only regex
        groups associated with a color are included.

        Args:
            data (str): A string to match regex against.
        """
        if not self.colors:
            return []

        matches = []

        for match in self.regex.finditer(data):
            for group in self.colors:
                start, end = match.span(group)

                # Zero-length match or optional group not in the match
                if start == end:
                    continue

                matches.append((start, end, self.colors[group]))

        return matches 
Example 22
Project: rekall   Author: google   File: generate_test.py    License: GNU General Public License v2.0 6 votes vote down vote up
def ExtractTestCases(data):
    test_cases = []

    # The start of the section is at this file offset. This is mapped into
    # memory at the .text segment (which is at offset 0).
    m = re.search("__start__", data)
    origin = m.start()

    for match in re.finditer(
        r"(---.*?\.\.\.)\n<bin>(.+?)</bin>", data, re.M | re.S):
        offset, _ = match.span(2)

        # Replace the assembled segment with a base64 equivalent.
        segment = yaml.safe_load(match.group(1))
        segment["offset"] = offset - origin
        segment["data"] = match.group(2).encode("base64").strip()
        test_cases.append(segment)

    return test_cases 
Example 23
Project: pcocc   Author: cea-hpc   File: parser.py    License: GNU General Public License v3.0 6 votes vote down vote up
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv 
Example 24
Project: multibootusb   Author: mbusb   File: udisks.py    License: GNU General Public License v2.0 5 votes vote down vote up
def device(self, device_node_path):
        device_node_path = os.path.realpath(device_node_path)
        devname = device_node_path.split('/')[-1]

        # First we try a direct object path
        bd = self.bus.get_object('org.freedesktop.UDisks2',
                        '/org/freedesktop/UDisks2/block_devices/%s'%devname)
        try:
            device = bd.Get(self.BLOCK, 'Device',
                dbus_interface='org.freedesktop.DBus.Properties')
            device = bytearray(device).replace(b'\x00', b'').decode('utf-8')
        except:
            device = None

        if device == device_node_path:
            return bd

        # Enumerate all devices known to UDisks
        devs = self.bus.get_object('org.freedesktop.UDisks2',
                        '/org/freedesktop/UDisks2/block_devices')
        xml = devs.Introspect(dbus_interface='org.freedesktop.DBus.Introspectable')
        for dev in re.finditer(r'name=[\'"](.+?)[\'"]', type('')(xml)):
            bd = self.bus.get_object('org.freedesktop.UDisks2',
                '/org/freedesktop/UDisks2/block_devices/%s2'%dev.group(1))
            try:
                device = bd.Get(self.BLOCK, 'Device',
                    dbus_interface='org.freedesktop.DBus.Properties')
                device = bytearray(device).replace(b'\x00', b'').decode('utf-8')
            except:
                device = None
            if device == device_node_path:
                return bd

        raise ValueError('%r not known to UDisks2'%device_node_path) 
Example 25
def process_output(command_output):
    warnings = {}
    regex = r"(.*):\swarning:\s(.*)"
    lines = command_output.split("\n")
    for line in lines[:-2]:
        matches = re.finditer(regex, line)
        for matchNum, match in enumerate(matches):
            try:
                warnings[match.group()] +=1
            except KeyError:
                warnings[match.group()] =1
    time = lines[-2]
    return time, warnings 
Example 26
Project: bioservices   Author: cokelaer   File: peptides.py    License: GNU General Public License v3.0 5 votes vote down vote up
def get_phosphosite_position(self, uniprot_name, peptide):
        if uniprot_name not in self.sequences.keys():
            seq = self.get_fasta_sequence(uniprot_name)
            self.sequences[uniprot_name] = seq[:]
        else:
            seq = self.sequences[uniprot_name][:]
        positions = [x.start() for x in re.finditer("PQS", seq)]
        return positions 
Example 27
Project: mx   Author: graalvm   File: mx_benchmark.py    License: GNU General Public License v2.0 5 votes vote down vote up
def parseResults(self, text):
        return (m.groupdict() for m in re.finditer(self.pattern, text, re.MULTILINE)) 
Example 28
Project: mx   Author: graalvm   File: mx_benchmark.py    License: GNU General Public License v2.0 5 votes vote down vote up
def getCSVFiles(self, text):
        return (m.groupdict()[self.match_name] for m in re.finditer(self.pattern, text, re.MULTILINE)) 
Example 29
Project: rop-chainer   Author: wizh   File: strings.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _locate_useful(self, sections):
        for section in sections:
            for string in self._useful:
                matches = [m.start() for m in re.finditer(string, section["data"])]
                for index in matches:
                    self._strings +=\
                        [{"text" : section["data"][index:index+len(string)],
                          "vaddr" : section["vaddr"] + index}] 
Example 30
Project: jawfish   Author: war-and-code   File: tarfile.py    License: MIT License 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes))