Python re.finditer() Examples

The following are code examples for showing how to use re.finditer(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pyblish-win   Author: pyblish   File: test_re.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def test_bug_817234(self):
        iter = re.finditer(r".*", "asdf")
        self.assertEqual(iter.next().span(), (0, 4))
        self.assertEqual(iter.next().span(), (4, 4))
        self.assertRaises(StopIteration, iter.next) 
Example 2
Project: Ansible-Example-AB2018   Author: umit-ozturk   File: ironware_facts.py    MIT License 6 votes vote down vote up
def parse_vll_endpoints(self, data):
        facts = list()
        regex = r'End-point[0-9 ]*: +(?P<tagged>tagged|untagged) +(vlan +(?P<vlan>[0-9]+) +)?(inner- vlan +(?P<innervlan>[0-9]+) +)?(?P<port>e [0-9/]+|--)'
        matches = re.finditer(regex, data, re.IGNORECASE | re.DOTALL)
        for n, match in enumerate(matches):
            f = match.groupdict()
            f['type'] = 'local'
            facts.append(f)

        regex = r'Vll-Peer +: +(?P<vllpeer>[0-9\.]+).*Tunnel LSP +: +(?P<lsp>\S+)'
        matches = re.finditer(regex, data, re.IGNORECASE | re.DOTALL)
        for n, match in enumerate(matches):
            f = match.groupdict()
            f['type'] = 'remote'
            facts.append(f)

        return facts 
Example 3
Project: Ansible-Example-AB2018   Author: umit-ozturk   File: ironware_facts.py    MIT License 6 votes vote down vote up
def parse_vpls_endpoints(self, data):
        facts = list()
        regex = r'Vlan (?P<vlanid>[0-9]+)\s(?: +(?:L2.*)\s| +Tagged: (?P<tagged>.+)+\s| +Untagged: (?P<untagged>.+)\s)*'
        matches = re.finditer(regex, data, re.IGNORECASE)
        for n, match in enumerate(matches):
            f = match.groupdict()
            f['type'] = 'local'
            facts.append(f)

        regex = r'Peer address: (?P<vllpeer>[0-9\.]+)'
        matches = re.finditer(regex, data, re.IGNORECASE)
        for n, match in enumerate(matches):
            f = match.groupdict()
            f['type'] = 'remote'
            facts.append(f)

        return facts 
Example 4
Project: reportengine   Author: NNPDF   File: templateparser.py    GNU General Public License v2.0 6 votes vote down vote up
def get_targets_and_replace(source):

    out = StringIO()

    for lineno, line in enumerate(source, 1):
        deli_matches = list(re.finditer(custom_delimiter_re, line))

        if not deli_matches:
            out.write(line)
            continue

        prevend = 0
        for deli_match in deli_matches:
            newstart, newend = deli_match.span()
            out.write(line[prevend:newstart])
            try:
                yield parse_match(deli_match, line, lineno, out)
            except BadToken as e:
                raise CustomParsingError(e, lineno, deli_match.pos)
            prevend = newend
        out.write(line[prevend:])


    return out.getvalue() 
Example 5
Project: COSC367-Artificial_Intelligence   Author: santochaoya   File: KBGraph.py    GNU General Public License v3.0 6 votes vote down vote up
def clauses(knowledge_base):
    """Takes the string of a knowledge base; returns an iterator for pairs
    of (head, body) for propositional definite clauses in the
    knowledge base. Atoms are returned as strings. The head is an atom
    and the body is a (possibly empty) list of atoms.

    Author: Kourosh Neshatian

    """
    ATOM   = r"[a-z][a-zA-z\d_]*"
    HEAD   = r"\s*(?P<HEAD>{ATOM})\s*".format(**locals())
    BODY   = r"\s*(?P<BODY>{ATOM}\s*(,\s*{ATOM}\s*)*)\s*".format(**locals())
    CLAUSE = r"{HEAD}(:-{BODY})?\.".format(**locals())
    KB     = r"^({CLAUSE})*\s*$".format(**locals())

    assert re.match(KB, knowledge_base)

    for mo in re.finditer(CLAUSE, knowledge_base):
        yield mo.group('HEAD'), re.findall(ATOM, mo.group('BODY') or "") 
Example 6
Project: COSC367-Artificial_Intelligence   Author: santochaoya   File: DerivedAtoms.py    GNU General Public License v3.0 6 votes vote down vote up
def clauses(knowledge_base):
    """Takes the string of a knowledge base; returns an iterator for pairs
    of (head, body) for propositional definite clauses in the
    knowledge base. Atoms are returned as strings. The head is an atom
    and the body is a (possibly empty) list of atoms.

    Author: Kourosh Neshatian

    """
    ATOM   = r"[a-z][a-zA-z\d_]*"
    HEAD   = r"\s*(?P<HEAD>{ATOM})\s*".format(**locals())
    BODY   = r"\s*(?P<BODY>{ATOM}\s*(,\s*{ATOM}\s*)*)\s*".format(**locals())
    CLAUSE = r"{HEAD}(:-{BODY})?\.".format(**locals())
    KB     = r"^({CLAUSE})*\s*$".format(**locals())

    assert re.match(KB, knowledge_base)

    for mo in re.finditer(CLAUSE, knowledge_base):
        yield mo.group('HEAD'), re.findall(ATOM, mo.group('BODY') or "") 
Example 7
Project: mindustry-modding   Author: SimonWoodburyForget   File: to_wiki.py    GNU General Public License v3.0 6 votes vote down vote up
def normalize(md):
    '''Normalize anchors.'''
    def on_match(link):
        desc = link.group(1)
        old = link.group(2)
        href = (link.group(2)
                .lower()
                .replace('%20', '-')
                .replace(" ", "-")
                .replace("~", "")
                .replace(".", ""))
        old, new = f'[{desc}]({old})', f'[{desc}]({href})'
        print(old, new)
        return old, new

    replacers = set((on_match(x) for x in re.finditer(r'\[([^\]\[]*)\]\((#[^\)]*)\)', md)))
    return ft.reduce(lambda md, x: md.replace(x[0], x[1]), replacers, md) 
Example 8
Project: cloudygo   Author: sethtroisi   File: sgf_utils.py    Apache License 2.0 6 votes vote down vote up
def canonical_sgf(board_size, sgf):
    if not sgf:
        return sgf

    # NOTE: This should really utilize a real SGF parser...
    # One with tests and better utils...
    tokens = list(re.finditer('(;[BW]\[(..)\]|\[(..):)', sgf))
    moves = [token.group(2) or token.group(3) for token in tokens]

    # Silly but what you doing to do.
    cords = ';'.join([sgf_to_cord(board_size, 'B[' + m + ']') for m in moves])
    canonical = canonical_moves(board_size, cords).split(';')
    new_moves = [cord_to_sgf(board_size, c) for c in canonical]

    new_sgf = list(sgf)
    for token, move, new_move in zip(tokens, moves, new_moves):
        # If you change this test it PLEASE
        new_token = list(token.group(0).replace(move, new_move))
        new_sgf[token.start():token.end()] = new_token
        #print (token.span(), move, new_move, "\t", ''.join(new_sgf))

    return ''.join(new_sgf) 
Example 9
Project: cloudygo   Author: sethtroisi   File: sgf_utils.py    Apache License 2.0 6 votes vote down vote up
def raw_game_data(filepath, data):
    # TODO this doesn't find comments not on moves.
    # UGLY HACK to allow comment before or after W[] B[] tag.
    raw_moves = list(re.finditer(
        r';\s*([BW]\[[a-t]*\]|C\[[^]]*\])\s*([BW]\[[a-s]*\]|C\[[^]]*\])?',
        data))

    moves = []
    comments = []
    for match in raw_moves:
        if match.group(1).startswith('C'):
            comments.append(match.group(1))
            moves.append(match.group(2))
        else:
            moves.append(match.group(1))
            if match.group(2):
                comments.append(match.group(2))

    # format is: resign, (pv_moves, pv_counts), (Q0, Qpv), table
    parsed_comments = list(map(fully_parse_comment, comments))
    return moves, parsed_comments 
Example 10
Project: rop-chainer   Author: wizh   File: gadgets.py    GNU General Public License v3.0 6 votes vote down vote up
def _locate_gadgets(self, section, terminals, gadget_type):
        disassembler = cs.Cs(cs.CS_ARCH_X86, cs.CS_MODE_32)
        for terminal in terminals:
            matches = [match.start() for match in re.finditer(terminal[0],
                                                              section["data"])]
            for index in matches:
                for i in range(self._options.depth):
                    gadget = ""
                    instructions = disassembler.disasm_lite(
                        section["data"][index-i:index+terminal[1]],
                        section["vaddr"]+index)
                    for instruction in instructions:
                        gadget += (str(instruction[2]) + " " +
                                   str(instruction[3])   + " ; ")

                    if gadget:
                        gadget = gadget.replace("  ", " ")
                        gadget = gadget[:-3]
                        self._gadgets += [{"vaddr" : section["vaddr"]+index-i,
                                           "insts" : gadget,
                                           "gadget_type" : gadget_type}] 
Example 11
Project: alpyca   Author: alpyca   File: launch.py    MIT License 6 votes vote down vote up
def find_find_action(self, text):
        expr = r'\$\(find (.*?)\)'

        matches = []
        for re_match in re.finditer(expr, text):
            start = re_match.start()
            end = re_match.end()

            words = text[start+2:end-1].split(' ')
            keyword = words[0]
            value = ' '.join(words[1:])

            if len(text) > end and text[end] == '/':
                end_postfix = end + 1
                while not (len(text) == end_postfix or text[end_postfix] in [' ', '\'', '\"']):
                    end_postfix += 1
                replacement = find(value, text[end+1:end_postfix])
            else:
                replacement = simple_find(value)
            match = Match(start, end, keyword, value, replacement)
            matches.append(match)
        
        return matches 
Example 12
Project: alpyca   Author: alpyca   File: launch.py    MIT License 6 votes vote down vote up
def find_action(self, text, action):
        if action == 'eval':
            return self.find_eval_action(text)
        elif action == 'find':
            return self.find_find_action(text)

        expr = r'\$\(' + action + '(.*?)\)'

        matches = []
        for re_match in re.finditer(expr, text):
            start = re_match.start()
            end = re_match.end()

            words = text[start+2:end-1].split(' ')
            keyword = words[0]
            value = ' '.join(words[1:])

            if keyword in self.actions:
                replacement = self.actions[keyword](value)
            else:
                raise ParsingException('Unknown keyword {}!'.format(keyword))
            match = Match(start, end, keyword, value, replacement)
            matches.append(match)
        
        return matches 
Example 13
Project: jawfish   Author: war-and-code   File: _string.py    MIT License 6 votes vote down vote up
def formatter_parser(*args,**kw):
    """parse the argument as a format string"""

    assert len(args)==1
    assert isinstance(args[0], str)

    _result=[]
    for _match in re.finditer("([^{]*)?(\{[^}]*\})?", args[0]):
        _pre, _fmt = _match.groups()
        if _fmt is None:
           _result.append((_pre, None, None, None))
        elif _fmt == '{}':
           _result.append((_pre, '', '', None))
        else:
           _m=re.match("\{([^!]*)!?(.*)?\}", _fmt)
           _name=_m.groups(0)
           _flags=_m.groups(1)

           _result.append((_pre, _name, _flags, None))

    return _result 
Example 14
Project: jawfish   Author: war-and-code   File: test_re.py    MIT License 6 votes vote down vote up
def test_finditer(self):
        iter = re.finditer(r":+", "a:b::c:::d")
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", 1, 10)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
        self.assertEqual([item.group(0) for item in iter],
                         ["::", "::"]) 
Example 15
Project: Repobot   Author: Desgard   File: document.py    MIT License 6 votes vote down vote up
def find_backwards(self, sub, in_current_line=False, ignore_case=False, count=1):
        """
        Find `text` before the cursor, return position relative to the cursor
        position. Return `None` if nothing was found.

        :param count: Find the n-th occurance.
        """
        if in_current_line:
            before_cursor = self.current_line_before_cursor[::-1]
        else:
            before_cursor = self.text_before_cursor[::-1]

        flags = re.IGNORECASE if ignore_case else 0
        iterator = re.finditer(re.escape(sub[::-1]), before_cursor, flags)

        try:
            for i, match in enumerate(iterator):
                if i + 1 == count:
                    return - match.start(0) - len(sub)
        except StopIteration:
            pass 
Example 16
Project: Repobot   Author: Desgard   File: document.py    MIT License 6 votes vote down vote up
def find_next_word_beginning(self, count=1, WORD=False):
        """
        Return an index relative to the cursor position pointing to the start
        of the next word. Return `None` if nothing was found.
        """
        if count < 0:
            return self.find_previous_word_beginning(count=-count, WORD=WORD)

        regex = _FIND_BIG_WORD_RE if WORD else _FIND_WORD_RE
        iterator = regex.finditer(self.text_after_cursor)

        try:
            for i, match in enumerate(iterator):
                # Take first match, unless it's the word on which we're right now.
                if i == 0 and match.start(1) == 0:
                    count += 1

                if i + 1 == count:
                    return match.start(1)
        except StopIteration:
            pass 
Example 17
Project: Repobot   Author: Desgard   File: document.py    MIT License 6 votes vote down vote up
def find_previous_word_beginning(self, count=1, WORD=False):
        """
        Return an index relative to the cursor position pointing to the start
        of the previous word. Return `None` if nothing was found.
        """
        if count < 0:
            return self.find_next_word_beginning(count=-count, WORD=WORD)

        regex = _FIND_BIG_WORD_RE if WORD else _FIND_WORD_RE
        iterator = regex.finditer(self.text_before_cursor[::-1])

        try:
            for i, match in enumerate(iterator):
                if i + 1 == count:
                    return - match.end(1)
        except StopIteration:
            pass 
Example 18
Project: Repobot   Author: Desgard   File: document.py    MIT License 6 votes vote down vote up
def find_previous_word_ending(self, count=1, WORD=False):
        """
        Return an index relative to the cursor position pointing to the end
        of the previous word. Return `None` if nothing was found.
        """
        if count < 0:
            return self.find_next_word_ending(count=-count, WORD=WORD)

        text_before_cursor = self.text_after_cursor[:1] + self.text_before_cursor[::-1]

        regex = _FIND_BIG_WORD_RE if WORD else _FIND_WORD_RE
        iterator = regex.finditer(text_before_cursor)

        try:
            for i, match in enumerate(iterator):
                # Take first match, unless it's the word on which we're right now.
                if i == 0 and match.start(1) == 0:
                    count += 1

                if i + 1 == count:
                    return -match.start(1) + 1
        except StopIteration:
            pass 
Example 19
Project: razzy-spinner   Author: rafasashi   File: tree.py    GNU General Public License v3.0 6 votes vote down vote up
def _ieer_read_text(s, top_node):
    stack = [Tree(top_node, [])]
    for piece_m in re.finditer('<[^>]+>|[^\s<]+', s):
        piece = piece_m.group()
        try:
            if piece.startswith('<b_'):
                m = _IEER_TYPE_RE.match(piece)
                if m is None: print 'XXXX', piece
                chunk = Tree(m.group('type'), [])
                stack[-1].append(chunk)
                stack.append(chunk)
            elif piece.startswith('<e_'):
                stack.pop()
#           elif piece.startswith('<'):
#               print "ERROR:", piece
#               raise ValueError # Unexpected HTML
            else:
                stack[-1].append(piece)
        except (IndexError, ValueError):
            raise ValueError('Bad IEER string (error at character %d)' %
                             piece_m.start())
    if len(stack) != 1:
        raise ValueError('Bad IEER string')
    return stack[0] 
Example 20
Project: razzy-spinner   Author: rafasashi   File: chunkparser_app.py    GNU General Public License v3.0 6 votes vote down vote up
def show_help(self, tab):
        self.helpbox['state'] = 'normal'
        self.helpbox.delete('1.0', 'end')
        for (name, tabstops, text) in self.HELP:
            if name == tab:
                text = text.replace('<<TAGSET>>', '\n'.join(
                    ('\t%s\t%s' % item for item in sorted(list(self.tagset.items()),
                    key=lambda t_w:re.match('\w+',t_w[0]) and (0,t_w[0]) or (1,t_w[0])))))

                self.helptabs[name].config(**self._HELPTAB_FG_PARAMS)
                self.helpbox.config(tabs=tabstops)
                self.helpbox.insert('1.0', text+'\n'*20)
                C = '1.0 + %d chars'
                for (tag, params) in self.HELP_AUTOTAG:
                    pattern = '(?s)(<%s>)(.*?)(</%s>)' % (tag, tag)
                    for m in re.finditer(pattern, text):
                        self.helpbox.tag_add('elide',
                                             C % m.start(1), C % m.end(1))
                        self.helpbox.tag_add('tag-%s' % tag,
                                             C % m.start(2), C % m.end(2))
                        self.helpbox.tag_add('elide',
                                             C % m.start(3), C % m.end(3))
            else:
                self.helptabs[name].config(**self._HELPTAB_BG_PARAMS)
        self.helpbox['state'] = 'disabled' 
Example 21
Project: razzy-spinner   Author: rafasashi   File: chunkparser_app.py    GNU General Public License v3.0 6 votes vote down vote up
def _syntax_highlight_grammar(self, grammar):
        if self.top is None: return
        self.grammarbox.tag_remove('comment', '1.0', 'end')
        self.grammarbox.tag_remove('angle', '1.0', 'end')
        self.grammarbox.tag_remove('brace', '1.0', 'end')
        self.grammarbox.tag_add('hangindent', '1.0', 'end')
        for lineno, line in enumerate(grammar.split('\n')):
            if not line.strip(): continue
            m = re.match(r'(\\.|[^#])*(#.*)?', line)
            comment_start = None
            if m.group(2):
                comment_start = m.start(2)
                s = '%d.%d' % (lineno+1, m.start(2))
                e = '%d.%d' % (lineno+1, m.end(2))
                self.grammarbox.tag_add('comment', s, e)
            for m in re.finditer('[<>{}]', line):
                if comment_start is not None and m.start() >= comment_start:
                    break
                s = '%d.%d' % (lineno+1, m.start())
                e = '%d.%d' % (lineno+1, m.end())
                if m.group() in '<>':
                    self.grammarbox.tag_add('angle', s, e)
                else:
                    self.grammarbox.tag_add('brace', s, e) 
Example 22
Project: razzy-spinner   Author: rafasashi   File: texttiling.py    GNU General Public License v3.0 6 votes vote down vote up
def _mark_paragraph_breaks(self, text):
        """Identifies indented text or line breaks as the beginning of
        paragraphs"""

        MIN_PARAGRAPH = 100
        pattern = re.compile("[ \t\r\f\v]*\n[ \t\r\f\v]*\n[ \t\r\f\v]*")
        matches = pattern.finditer(text)

        last_break = 0
        pbreaks = [0]
        for pb in matches:
            if pb.start()-last_break < MIN_PARAGRAPH:
                continue
            else:
                pbreaks.append(pb.start())
                last_break = pb.start()

        return pbreaks 
Example 23
Project: yang-explorer   Author: CiscoDevNet   File: adapter.py    Apache License 2.0 6 votes vote down vote up
def get_ydk_def_names(python_ydk_defs):
        """
        Get the Python YDK definition names
        """

        logging.debug('get_ydk_def_names: python_ydk_defs : \n' + python_ydk_defs)

        import re

        ydk_def_names = ""
        for m in re.finditer(r"def \w+()", python_ydk_defs):
            logging.debug('get_ydk_def_names: m.group(0): \n' + m.group(0))
            tmp_str = m.group(0).replace('def ', '')
            ydk_def_names = ydk_def_names + tmp_str + " "

        logging.debug('get_ydk_def_names: ydk_def_names : \n' + ydk_def_names)

        return ydk_def_names 
Example 24
Project: ng   Author: cls1991   File: ng.py    Apache License 2.0 6 votes vote down vote up
def _hack_ip():
    system = _system()
    if system not in SUPPORTED_SYSTEMS:
        return False, 'Unknown operation system {0}'.format(system)

    local_ip = public_ip = DEFAULT_IP_ADDRESS
    if system == 'Darwin':
        command = ['ifconfig']
        pattern = re.compile(r'inet (?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
    elif system == 'Linux':
        command = ['ip', 'addr']
        pattern = re.compile(r'inet (?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
    else:
        command = ['ipconfig']
        pattern = re.compile(r'IPv4.+: (?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})')
    rs = _exec(command)
    for match in re.finditer(pattern, rs):
        sip = match.group('ip')
        if sip != DEFAULT_IP_ADDRESS:
            local_ip = sip
            break
    try:
        r = requests.get(VERIFY_HOST)
        public_ip = r.json()['origin']
    except requests.RequestException:
        pass
    return True, '{0}\n{1}'.format(local_ip, public_ip) 
Example 25
Project: xstrings   Author: elicn   File: xstrings.py    The Unlicense 5 votes vote down vote up
def finditer(content, encodings, charset, min_size):
    '''Generator function that iterates over all string matches inside the given content which are at least
    min_size characters long.

    @param    content    Binary content to search in
    @param    encodings  Dictionary of encoding functions
    @param    charset    An interable object containing the characters to consider as part of a string
    @param    min_size   Minimal string size to consider as a string match

    @return A tuple containing the match offset in content, encoding name, encoding key and the deobfuscated
            string reconstructed from the blob found
    '''

    # iterate over available encoding fucntions
    for encoding_name, (encoding_function, encoding_range) in encodings.items():

        # iterate over all keys in range for that encoding function
        for key in encoding_range:
            encoded_charset = encoding_function(charset, key)

            pattern = '[%s]{%d,}' % (re.escape(encoded_charset), min_size)

            for match in re.finditer(pattern, content):
                # deobfuscation: reconstruct the original string
                deobf = ''.join(charset[encoded_charset.index(c)] for c in match.group(0))

                yield (match.start(0), encoding_name, key, deobf)

        # cleanup regex cache once in a while
        re.purge() 
Example 26
Project: xstrings   Author: elicn   File: xstrings.py    The Unlicense 5 votes vote down vote up
def main(args):
    # prepare the format string for file offsets if required
    if args.radix:
        radixfmt = '%%7%s' % args.radix

    # iterate over input files list
    for fd in args.infiles:

        # gnu strings emits '{standard input}' instead of 'stdin' if required to emit filename
        # stick with the snu strings style if necessary
        if args.print_file_name:
            filename = '{standard input}' if fd == stdin else fd.name

        # iterate over findings in current input file
        # each iteration returns offset, encoding name, encoding key and deobfuscated string found
        for offset, enc_name, enc_key, deobf in finditer(fd.read(), args.encodings, args.charset, args.bytes):
            if args.print_file_name:
                print '%s:' % filename,

            if args.radix:
                print radixfmt % offset,

            print '%s(%x) %s' % (enc_name, enc_key, deobf) 
Example 27
Project: fs_image   Author: facebookincubator   File: test_extents_to_chunks.py    MIT License 5 votes vote down vote up
def _gen_ranges_from_figure(figure: str):
    for s in textwrap.dedent(figure.strip('\n')).split('\n'):
        s = s.rstrip()
        # Number lines should aid reading off positions. Check they're right.
        if re.match('[0-9]*$', s):
            assert ('0123456789' * math.ceil(len(s) / 10))[:len(s)] == s, \
                f'Bad number line {s} in {figure}'
            continue
        offset = 0
        for m in re.finditer(r'(.)\1*', s):
            v = m.group(0)
            if v[0] != ' ':
                yield v[0], offset, len(v)
            offset += len(v) 
Example 28
Project: leapp-repository   Author: oamg   File: lib_spamc.py    Apache License 2.0 5 votes vote down vote up
def _parse_spamc_ssl_argument(content):
    content = _remove_comments(content)
    res = None
    for match in re.finditer(r'(?<!\S)--ssl(\s+|=)(sslv3|tlsv1)(?!\S)', content):
        arg = match.group(2)
        if arg == 'tlsv1' or (arg == 'sslv3' and res is None):
            res = arg
    return res 
Example 29
Project: autofff   Author: ChiefGokhlayeh   File: scanner.py    MIT License 5 votes vote down vote up
def _read_symbols(self, pathToObj: str) -> SymbolTable:
        path_list = ['objdump', '-t']
        path_list += [pathToObj]

        try:
            pipe = subprocess.Popen(path_list,
                                    stdout=subprocess.PIPE,
                                    universal_newlines=True)
            text = pipe.communicate()[0]
            matches = re.finditer(
                r"(?P<object>.*):\s+file format.*\s+SYMBOL TABLE:\n(?P<symbols>(?:.+(\n|$))*)", text, re.MULTILINE)
            tables = list()
            for match in matches:
                print(match)
                objectFile = match.group('object') or pathToObj
                symbols = match.group('symbols')
                symMatch = re.search(
                    r"\*ABS\*\s+[0-9a-fA-F]*\s+(?P<source>.*)", symbols, re.MULTILINE)
                sourceFile = symMatch.group('source')
                fuNMatches = re.finditer(r"", symbols, re.MULTILINE)

                tables.append(SymbolTable(objectFile, None))
                print(objectFile)
                print(sourceFile)
                print(symbols)
        except OSError as e:
            raise RuntimeError("Unable to invoke 'readelf'.  " +
                               'Make sure its path was passed correctly\n' +
                               ('Original error: %s' % e)) 
Example 30
Project: pyblish-win   Author: pyblish   File: test_re.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_finditer(self):
        iter = re.finditer(r":+", "a:b::c:::d")
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"]) 
Example 31
Project: pyblish-win   Author: pyblish   File: test_re.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_bug_581080(self):
        iter = re.finditer(r"\s", "a b")
        self.assertEqual(iter.next().span(), (1,2))
        self.assertRaises(StopIteration, iter.next)

        scanner = re.compile(r"\s").scanner("a b")
        self.assertEqual(scanner.search().span(), (1, 2))
        self.assertIsNone(scanner.search()) 
Example 32
Project: pyblish-win   Author: pyblish   File: test_re.py    GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_dealloc(self):
        # issue 3299: check for segfault in debug build
        import _sre
        # the overflow limit is different on wide and narrow builds and it
        # depends on the definition of SRE_CODE (see sre.h).
        # 2**128 should be big enough to overflow on both. For smaller values
        # a RuntimeError is raised instead of OverflowError.
        long_overflow = 2**128
        self.assertRaises(TypeError, re.finditer, "a", {})
        self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow]) 
Example 33
Project: kuaa   Author: rafaelwerneck   File: util.py    GNU General Public License v3.0 5 votes vote down vote up
def read_array(string):
    """
    Read string that contais a numpy array.
    """
    
    search_array = "array\("
    search_end = "\)"
    find_array = re.finditer(search_array, string)
    find_end = re.finditer(search_end, string) 
Example 34
Project: Ansible-Example-AB2018   Author: umit-ozturk   File: tarfile.py    MIT License 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 35
Project: flasky   Author: RoseOu   File: _postgres_builtins.py    MIT License 5 votes vote down vote up
def parse_keywords(f):
    kw = []
    for m in re.finditer(
            r'\s*<entry><token>([^<]+)</token></entry>\s*'
            r'<entry>([^<]+)</entry>', f.read()):
        kw.append(m.group(1))

    if not kw:
        raise ValueError('no keyword found')

    kw.sort()
    return kw 
Example 36
Project: flasky   Author: RoseOu   File: tarfile.py    MIT License 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 37
Project: dynamic-training-with-apache-mxnet-on-aws   Author: awslabs   File: process_output.py    Apache License 2.0 5 votes vote down vote up
def process_output(command_output):
    warnings = {}
    regex = r"(.*):\swarning:\s(.*)"
    lines = command_output.split("\n")
    for line in lines[:-2]:
        matches = re.finditer(regex, line)
        for matchNum, match in enumerate(matches):
            try:
                warnings[match.group()] +=1
            except KeyError:
                warnings[match.group()] =1
    time = lines[-2]
    return time, warnings 
Example 38
Project: CodeDog   Author: BruceDLong   File: CodeGenerator.py    GNU General Public License v2.0 5 votes vote down vote up
def codeUserMesg(item, xlator):
    # TODO: Make 'user messages'interpolate and adjust for locale.
    S=''; fmtStr=''; argStr='';
    pos=0
    for m in re.finditer(r"%[ilscp]`.+?`", item):
        fmtStr += item[pos:m.start()+2]
        argStr += ', ' + item[m.start()+3:m.end()-1]
        pos=m.end()
    fmtStr += item[pos:]
    fmtStr=fmtStr.replace('"', r'\"')
    S=xlator['langStringFormatterCommand'](fmtStr, argStr)
    return S 
Example 39
Project: Trusted-Platform-Module-nova   Author: BU-NU-CLOUD-SP16   File: support_matrix.py    Apache License 2.0 5 votes vote down vote up
def _create_notes_paragraph(self, notes):
        """ Constructs a paragraph which represents the implementation notes

        The paragraph consists of text and clickable URL nodes if links were
        given in the notes.
        """
        para = nodes.paragraph()
        # links could start with http:// or https://
        link_idxs = [m.start() for m in re.finditer('https?://', notes)]
        start_idx = 0
        for link_idx in link_idxs:
            # assume the notes start with text (could be empty)
            para.append(nodes.inline(text=notes[start_idx:link_idx]))
            # create a URL node until the next text or the end of the notes
            link_end_idx = notes.find(" ", link_idx)
            if link_end_idx == -1:
                # In case the notes end with a link without a blank
                link_end_idx = len(notes)
            uri = notes[link_idx:link_end_idx + 1]
            para.append(nodes.reference("", uri, refuri=uri))
            start_idx = link_end_idx + 1

        # get all text after the last link (could be empty) or all of the
        # text if no link was given
        para.append(nodes.inline(text=notes[start_idx:]))
        return para 
Example 40
Project: sic   Author: Yanixos   File: tarfile.py    GNU General Public License v3.0 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 41
Project: sic   Author: Yanixos   File: tarfile.py    GNU General Public License v3.0 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 42
Project: cards.py   Author: jhauberg   File: templatefield.py    MIT License 5 votes vote down vote up
def fields(content: str,
           with_name_like: str=None,
           with_context_like: str=None,
           strictly_matching: bool=True) -> Iterator[TemplateField]:
    """ Return an iterator for all fields (e.g. '{{ a_field }}') that occur in a template. """

    pattern = r'{{\s?(([^}}\s]*)\s?(.*?))\s?}}'

    for match in re.finditer(pattern, content):
        inner_content = match.group(1).strip()
        name = match.group(2).strip()
        context = match.group(3).strip()

        inner_content = inner_content if len(inner_content) > 0 else None
        name = name if len(name) > 0 else None
        context = context if len(context) > 0 else None

        field = TemplateField(
            name, context, inner_content, indices=range(
                match.start(), match.end()))

        satisfies_name_filter = (with_name_like is None or
                                 (with_name_like is not None and field.name is not None
                                  and re.search(with_name_like, field.name) is not None))

        satisfies_context_filter = (with_context_like is None or
                                    (with_context_like is not None and field.context is not None
                                     and re.search(with_context_like, field.context) is not None))

        satisfies_filter = (satisfies_name_filter and satisfies_context_filter
                            if strictly_matching
                            else satisfies_name_filter or satisfies_context_filter)

        if satisfies_filter:
            yield field 
Example 43
Project: cards.py   Author: jhauberg   File: template.py    MIT License 5 votes vote down vote up
def strip_styles(template: Template) -> str:
    """ Strip and return any embedded <style></style> content from a template. """

    pattern = r'<style.*?>(.+?)</style>'
    stripped_styles = ''

    search = re.compile(pattern, re.DOTALL)

    # find all style matches and extract embedded styles
    for style_match in re.finditer(pattern, template.content, re.DOTALL):
        # note that we strip the entire style- not the inner content
        style = style_match.group(0).strip()
        # separating each style block for good measure
        stripped_styles = stripped_styles + '\n' + style if len(stripped_styles) > 0 else style

    # finally remove all style matches
    # note that this removes the <style></style> tags too
    template.content = re.sub(search, '', template.content).strip()

    # make sure we keep it clean- no unnecessary newlines or excess whitespace
    stripped_styles = stripped_styles.strip()

    template_field_names = list((field.name for field in fields(stripped_styles)))

    if len(template_field_names) > 0:
        context = template.path

        # if there's any fields in the styles, display a warning about it
        WarningDisplay.fields_in_styles(
            WarningContext(context), template_field_names)

    return stripped_styles 
Example 44
Project: AshsSDK   Author: thehappydinoa   File: tarfile.py    MIT License 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 45
Project: python-aiocqhttp   Author: richardchien   File: message.py    MIT License 5 votes vote down vote up
def _split_iter(msg_str: str) -> Iterable[MessageSegment]:
        def iter_function_name_and_extra() -> Iterable[Tuple[str, str]]:
            text_begin = 0
            for cqcode in re.finditer(r'\[CQ:(?P<type>[a-zA-Z0-9-_.]+)'
                                      r'(?P<params>'
                                      r'(?:,[a-zA-Z0-9-_.]+=?[^,\]]*)*'
                                      r'),?\]',
                                      msg_str):
                yield 'text', unescape(
                    msg_str[text_begin:cqcode.pos + cqcode.start()])
                text_begin = cqcode.pos + cqcode.end()
                yield cqcode.group('type'), cqcode.group('params').lstrip(',')
            yield 'text', unescape(msg_str[text_begin:])

        for function_name, extra in iter_function_name_and_extra():
            if function_name == 'text':
                if extra:
                    # only yield non-empty text segment
                    yield MessageSegment(type_=function_name,
                                         data={'text': extra})
            else:
                data = {k: v for k, v in map(
                    lambda x: x.split('=', maxsplit=1),
                    filter(lambda x: x, (x.lstrip() for x in extra.split(',')))
                )}
                yield MessageSegment(type_=function_name, data=data) 
Example 46
Project: mx   Author: graalvm   File: mx_benchmark.py    GNU General Public License v2.0 5 votes vote down vote up
def parseResults(self, text):
        return (m.groupdict() for m in re.finditer(self.pattern, text, re.MULTILINE)) 
Example 47
Project: mx   Author: graalvm   File: mx_benchmark.py    GNU General Public License v2.0 5 votes vote down vote up
def getCSVFiles(self, text):
        return (m.groupdict()[self.match_name] for m in re.finditer(self.pattern, text, re.MULTILINE)) 
Example 48
Project: ngo-addons-backport   Author: camptocamp   File: interface.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def post_process_xml_data(self, cr, uid, xml, context=None):

        if not context:
            context={}
        # find the position of the 3rd tag
        # (skip the <?xml ...?> and the "root" tag)
        iter = re.finditer('<[^>]*>', xml)
        i = iter.next()
        i = iter.next()
        pos_xml = i.end()

        doc = print_xml.document(cr, uid, {}, {})
        tmpl_path = openerp.modules.get_module_resource('base', 'report', 'corporate_defaults.xml')
        doc.parse(tmpl_path, [uid], 'res.users', context)
        corporate_header = doc.xml_get()
        doc.close()

        # find the position of the tag after the <?xml ...?> tag
        iter = re.finditer('<[^>]*>', corporate_header)
        i = iter.next()
        pos_header = i.end()

        return xml[:pos_xml] + corporate_header[pos_header:] + xml[pos_xml:]

    #
    # TODO: The translation doesn't work for "<tag t="1">textext<tag> tex</tag>text</tag>"
    # 
Example 49
Project: ngo-addons-backport   Author: camptocamp   File: mail.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def plaintext2html(text, container_tag=False):
    """ Convert plaintext into html. Content of the text is escaped to manage
        html entities, using cgi.escape().
        - all \n,\r are replaced by <br />
        - enclose content into <p>
        - 2 or more consecutive <br /> are considered as paragraph breaks

        :param string container_tag: container of the html; by default the
            content is embedded into a <div>
    """
    text = cgi.escape(ustr(text))

    # 1. replace \n and \r
    text = text.replace('\n', '<br/>')
    text = text.replace('\r', '<br/>')

    # 2-3: form paragraphs
    idx = 0
    final = '<p>'
    br_tags = re.compile(r'(([<]\s*[bB][rR]\s*\/?[>]\s*){2,})')
    for item in re.finditer(br_tags, text):
        final += text[idx:item.start()] + '</p><p>'
        idx = item.end()
    final += text[idx:] + '</p>'

    # 4. container
    if container_tag:
        final = '<%s>%s</%s>' % (container_tag, final, container_tag)
    return ustr(final) 
Example 50
Project: openhatch   Author: campbe13   File: _postgres_builtins.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def parse_keywords(f):
    kw = []
    for m in re.finditer(
            r'\s*<entry><token>([^<]+)</token></entry>\s*'
            r'<entry>([^<]+)</entry>', f.read()):
        kw.append(m.group(1))

    if not kw:
        raise ValueError('no keyword found')

    kw.sort()
    return kw 
Example 51
Project: rop-chainer   Author: wizh   File: strings.py    GNU General Public License v3.0 5 votes vote down vote up
def _locate_useful(self, sections):
        for section in sections:
            for string in self._useful:
                matches = [m.start() for m in re.finditer(string, section["data"])]
                for index in matches:
                    self._strings +=\
                        [{"text" : section["data"][index:index+len(string)],
                          "vaddr" : section["vaddr"] + index}] 
Example 52
Project: multiprot   Author: strubelab   File: builder.py    Apache License 2.0 5 votes vote down vote up
def extract_fixed(self, dom, full):
        """
        Extracts one model from another
        Finds the position of 'dom' inside 'full' comparing the sequence and atom
        coordinates for each chain in dom, gets the chain index and takes all the
        chains but the ones selected.
        
        :param dom: model of a single or multiple chain domain
        :type dom: PDBModel
        :param full: model of a multiple chain domain that contains 'dom'
        :type full: PDBModel

        :return: model 'full' without dom
        :type return: PDBModel
        """

        first_res_dom = dom.res2atomIndices([0])
        lowdom = first_res_dom[0]
        highdom = first_res_dom[-1]
        i=0
        for match in re.finditer(dom.sequence(), full.sequence()):
            start,end = match.span()
            first_res_full = full.res2atomIndices([start])
            lowfull = first_res_full[0]
            highfull = first_res_full[-1]
            i += 1
            # Compare only the atoms of the first residue
            if N.all(dom.xyz[lowdom:highdom+1] == full.xyz[lowfull:highfull+1]):
                atom_start = full.resIndex()[start]
                atom_end = full.res2atomIndices([end-1])[-1] + 1
                full.remove(list(range(atom_start,atom_end)))
                break

        return full 
Example 53
Project: findevil   Author: tylerha97   File: findevilinfo.py    MIT License 5 votes vote down vote up
def carve(input_file):
    """Carve PE files from segments adapted from Alexander Hanel's blog
    https://hooked-on-mnemonics.blogspot.com/2013/01/pe-carvpy.html
    """
    with open(input_file, "rb") as mem_dump:
        c = 1
        # For each address that contains MZ
        for y in [tmp.start() for tmp in re.finditer('\x4d\x5a',mem_dump.read())]:
            mem_dump.seek(y)
            try:
                pe = pefile.PE(data=mem_dump.read())
            except:
                continue 
            # Determine file ext
            if pe.is_dll() == True:
                ext = 'dll'
            elif pe.is_driver() == True:
                ext = 'sys'
            elif pe.is_exe() == True:
                ext = 'exe'
            else:
                ext = 'bin'

            print "Carving {} at {}".format(ext, hex(y))

            with open(input_file + "_" + str(c) + '.' + ext, 'wb') as out:
                out.write(pe.trim())

            c += 1
            ext = ''
            mem_dump.seek(0)
            pe.close() 
Example 54
Project: alpyca   Author: alpyca   File: launch.py    MIT License 5 votes vote down vote up
def find_eval_action(self, text):
        expr = r'\$\(eval (.*?)\)'

        matches = []
        for re_match in re.finditer(expr, text):
            start = re_match.start()
            end = re_match.end()

            words = text[start+2:end-1].split(' ')
            keyword = words[0]
            value = ' '.join(words[1:])

            # As a limitation, $(eval) expressions need to span the whole attribute string.
            # A mixture of other substitution args with eval within a single string is not possible.
            if text.startswith('$(eval') and text.endswith(')'):
                end = len(text)
                words = text[start+2:end-1].split(' ')
                keyword = words[0]
                value = ' '.join(words[1:])
                replacement = self.eval_text(value)
            else:
                raise ParsingException('$(eval) expressions need to span the whole attribute string!')

            match = Match(start, end, keyword, value, replacement)
            matches.append(match)
        
        return matches 
Example 55
Project: alpyca   Author: alpyca   File: launch.py    MIT License 5 votes vote down vote up
def find_unknown_action(self, text):
        expr = r'\$\((.*?)\)'

        matches = []
        for re_match in re.finditer(expr, text):
            raise ParsingException('TODO') 
Example 56
Project: jawfish   Author: war-and-code   File: tarfile.py    MIT License 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 57
Project: jawfish   Author: war-and-code   File: test_re.py    MIT License 5 votes vote down vote up
def test_bug_581080(self):
        iter = re.finditer(r"\s", "a b")
        self.assertEqual(next(iter).span(), (1,2))
        self.assertRaises(StopIteration, next, iter)

        scanner = re.compile(r"\s").scanner("a b")
        self.assertEqual(scanner.search().span(), (1, 2))
        self.assertEqual(scanner.search(), None) 
Example 58
Project: jawfish   Author: war-and-code   File: test_re.py    MIT License 5 votes vote down vote up
def test_bug_817234(self):
        iter = re.finditer(r".*", "asdf")
        self.assertEqual(next(iter).span(), (0, 4))
        self.assertEqual(next(iter).span(), (4, 4))
        self.assertRaises(StopIteration, next, iter) 
Example 59
Project: study   Author: program-in-chinese   File: 提取词条.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def camel_case_split(identifier):
    matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier)
    return [m.group(0) for m in matches] 
Example 60
Project: Repobot   Author: Desgard   File: _postgres_builtins.py    MIT License 5 votes vote down vote up
def parse_keywords(f):
        kw = []
        for m in re.finditer(
                r'\s*<entry><token>([^<]+)</token></entry>\s*'
                r'<entry>([^<]+)</entry>', f.read()):
            kw.append(m.group(1))

        if not kw:
            raise ValueError('no keyword found')

        kw.sort()
        return kw 
Example 61
Project: Repobot   Author: Desgard   File: tarfile.py    MIT License 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 62
Project: Repobot   Author: Desgard   File: document.py    MIT License 5 votes vote down vote up
def find(self, sub, in_current_line=False, include_current_position=False,
             ignore_case=False, count=1):
        """
        Find `text` after the cursor, return position relative to the cursor
        position. Return `None` if nothing was found.

        :param count: Find the n-th occurance.
        """
        assert isinstance(ignore_case, bool)

        if in_current_line:
            text = self.current_line_after_cursor
        else:
            text = self.text_after_cursor

        if not include_current_position:
            if len(text) == 0:
                return  # (Otherwise, we always get a match for the empty string.)
            else:
                text = text[1:]

        flags = re.IGNORECASE if ignore_case else 0
        iterator = re.finditer(re.escape(sub), text, flags)

        try:
            for i, match in enumerate(iterator):
                if i + 1 == count:
                    if include_current_position:
                        return match.start(0)
                    else:
                        return match.start(0) + 1
        except StopIteration:
            pass 
Example 63
Project: Repobot   Author: Desgard   File: document.py    MIT License 5 votes vote down vote up
def find_all(self, sub, ignore_case=False):
        """
        Find all occurances of the substring. Return a list of absolute
        positions in the document.
        """
        flags = re.IGNORECASE if ignore_case else 0
        return [a.start() for a in re.finditer(re.escape(sub), self.text, flags)] 
Example 64
Project: Repobot   Author: Desgard   File: document.py    MIT License 5 votes vote down vote up
def find_next_word_ending(self, include_current_position=False, count=1, WORD=False):
        """
        Return an index relative to the cursor position pointing to the end
        of the next word. Return `None` if nothing was found.
        """
        if count < 0:
            return self.find_previous_word_ending(count=-count, WORD=WORD)

        if include_current_position:
            text = self.text_after_cursor
        else:
            text = self.text_after_cursor[1:]

        regex = _FIND_BIG_WORD_RE if WORD else _FIND_WORD_RE
        iterable = regex.finditer(text)

        try:
            for i, match in enumerate(iterable):
                if i + 1 == count:
                    value = match.end(1)

                    if include_current_position:
                        return value
                    else:
                        return value + 1

        except StopIteration:
            pass 
Example 65
Project: Repobot   Author: Desgard   File: processors.py    MIT License 5 votes vote down vote up
def apply_transformation(self, cli, document, lineno, source_to_display, tokens):
        search_text = self._get_search_text(cli)
        searchmatch_current_token = (':', ) + Token.SearchMatch.Current
        searchmatch_token = (':', ) + Token.SearchMatch

        if search_text and not cli.is_returning:
            # For each search match, replace the Token.
            line_text = token_list_to_text(tokens)
            tokens = explode_tokens(tokens)

            flags = re.IGNORECASE if cli.is_ignoring_case else 0

            # Get cursor column.
            if document.cursor_position_row == lineno:
                cursor_column = source_to_display(document.cursor_position_col)
            else:
                cursor_column = None

            for match in re.finditer(re.escape(search_text), line_text, flags=flags):
                if cursor_column is not None:
                    on_cursor = match.start() <= cursor_column < match.end()
                else:
                    on_cursor = False

                for i in range(match.start(), match.end()):
                    old_token, text = tokens[i]
                    if on_cursor:
                        tokens[i] = (old_token + searchmatch_current_token, tokens[i][1])
                    else:
                        tokens[i] = (old_token + searchmatch_token, tokens[i][1])

        return Transformation(tokens) 
Example 66
Project: razzy-spinner   Author: rafasashi   File: regexp.py    GNU General Public License v3.0 5 votes vote down vote up
def span_tokenize(self, text):
        self._check_regexp()

        if self._gaps:
            for left, right in regexp_span_tokenize(text, self._regexp):
                if not (self._discard_empty and left == right):
                    yield left, right
        else:
            for m in re.finditer(self._regexp, text):
                yield m.span() 
Example 67
Project: razzy-spinner   Author: rafasashi   File: texttiling.py    GNU General Public License v3.0 5 votes vote down vote up
def _divide_to_tokensequences(self, text):
        "Divides the text into pseudosentences of fixed size"
        w = self.w
        wrdindex_list = []
        matches = re.finditer("\w+", text)
        for match in matches:
            wrdindex_list.append((match.group(), match.start()))
        return [TokenSequence(i/w, wrdindex_list[i:i+w])
                for i in range(0, len(wrdindex_list), w)] 
Example 68
Project: razzy-spinner   Author: rafasashi   File: util.py    GNU General Public License v3.0 5 votes vote down vote up
def _ieer_read_text(s, root_label):
    stack = [Tree(root_label, [])]
    # s will be None if there is no headline in the text
    # return the empty list in place of a Tree
    if s is None:
        return []
    for piece_m in re.finditer('<[^>]+>|[^\s<]+', s):
        piece = piece_m.group()
        try:
            if piece.startswith('<b_'):
                m = _IEER_TYPE_RE.match(piece)
                if m is None: print('XXXX', piece)
                chunk = Tree(m.group('type'), [])
                stack[-1].append(chunk)
                stack.append(chunk)
            elif piece.startswith('<e_'):
                stack.pop()
#           elif piece.startswith('<'):
#               print "ERROR:", piece
#               raise ValueError # Unexpected HTML
            else:
                stack[-1].append(piece)
        except (IndexError, ValueError):
            raise ValueError('Bad IEER string (error at character %d)' %
                             piece_m.start())
    if len(stack) != 1:
        raise ValueError('Bad IEER string')
    return stack[0] 
Example 69
Project: vscode-mayapy   Author: FXTD-ODYSSEY   File: debugger_unittest.py    MIT License 5 votes vote down vote up
def wait_for_get_next_statement_targets(self):
        last = ''
        while not '<xml><line>' in last:
            last = self.get_next_message('wait_for_get_next_statement_targets')

        matches = re.finditer(r"(<line>([0-9]*)<\/line>)", last, re.IGNORECASE)
        lines = []
        for _, match in enumerate(matches):
            try:
                lines.append(int(match.group(2)))
            except ValueError:
                pass
        return set(lines) 
Example 70
Project: vscode-mayapy   Author: FXTD-ODYSSEY   File: _bytecode_overflow_example.py    MIT License 5 votes vote down vote up
def fun(text):
        words = tuple(w[0].lower() for w in re.finditer(r'[a-zA-Z]+', text))
        non_en_pass = []
        for i, word in enumerate(words):
            non_en = []
            if not (word in en_words_basic
                    or (word.endswith('s') and word[:-1] in en_words_basic)
                    or (word.endswith('ed') and word[:-2] in en_words_basic)
                    or (word.endswith('ing') and word[:-3] in en_words_basic)
                    or word in en_words
                    or (word.endswith('s') and word[:-1] in en_words)
                    or (word.endswith('ed') and word[:-2] in en_words)
                    or (word.endswith('ing') and word[:-3] in en_words)
                    ):

                non_en.append(word)
                non_en_pass.append(word)
                for j in range(1, Dummy.non_en_words_limit):
                    if i + j >= len(words):
                        break
                    word = words[i + j]

                    if (word in en_words_basic
                        or (word.endswith('s') and word[:-1] in en_words_basic)
                        or (word.endswith('ed') and word[:-2] in en_words_basic)
                        or (word.endswith('ing') and word[:-3] in en_words_basic)
                        or word in en_words
                        or (word.endswith('s') and word[:-1] in en_words)
                        or (word.endswith('ed') and word[:-2] in en_words)
                        or (word.endswith('ing') and word[:-3] in en_words)
                    ):
                        break
                    else:
                        non_en.append(word)
                        non_en_pass.append(word) 
Example 71
Project: vscode-mayapy   Author: FXTD-ODYSSEY   File: _bytecode_overflow_example.py    MIT License 5 votes vote down vote up
def fun(text):
        words = tuple(w[0].lower() for w in re.finditer(r'[a-zA-Z]+', text))
        tracing()
        non_en_pass = []
        for i, word in enumerate(words):
            non_en = []
            if not (word in en_words_basic
                    or (word.endswith('s') and word[:-1] in en_words_basic)
                    or (word.endswith('ed') and word[:-2] in en_words_basic)
                    or (word.endswith('ing') and word[:-3] in en_words_basic)
                    or word in en_words
                    or (word.endswith('s') and word[:-1] in en_words)
                    or (word.endswith('ed') and word[:-2] in en_words)
                    or (word.endswith('ing') and word[:-3] in en_words)
                    ):

                non_en.append(word)
                non_en_pass.append(word)
                for j in range(1, Dummy.non_en_words_limit):
                    if i + j >= len(words):
                        break
                    word = words[i + j]
                    if (word in en_words_basic
                        or (word.endswith('s') and word[:-1] in en_words_basic)
                        or (word.endswith('ed') and word[:-2] in en_words_basic)
                        or (word.endswith('ing') and word[:-3] in en_words_basic)
                        or word in en_words
                        or (word.endswith('s') and word[:-1] in en_words)
                        or (word.endswith('ed') and word[:-2] in en_words)
                        or (word.endswith('ing') and word[:-3] in en_words)
                        ):
                        break
                    else:
                        non_en.append(word)
                        non_en_pass.append(word) 
Example 72
Project: Dumb-Cogs   Author: irdumbs   File: noflippedtables.py    MIT License 4 votes vote down vote up
def scrutinize_messages(self, message):
		channel = message.channel
		user = message.author
		if hasattr(user, 'bot') and user.bot is True:
                    return
		if channel.id not in self.flippedTables:
			 self.flippedTables[channel.id] = {}
		#┬─┬ ┬┬ ┻┻ ┻━┻ ┬───┬ ┻━┻ will leave 3 tables left flipped
		#count flipped tables
		for m in re.finditer('┻━*┻|┬─*┬', message.content):
			t = m.group()
			if '┻' in t and not (message.author.id == self.bot.user.id and self.settings["BOT_EXEMPT"]):
				if t in self.flippedTables[channel.id]:
					self.flippedTables[channel.id][t] += 1
				else:
					self.flippedTables[channel.id][t] = 1
					if not self.settings["ALL_TABLES"]:
						break
			else:
				f = t.replace('┬','┻').replace('─','━')
				if f in self.flippedTables[channel.id]:
					if self.flippedTables[channel.id][f] <= 0:
						del self.flippedTables[channel.id][f]
					else:
						self.flippedTables[channel.id][f] -= 1
		#wait random time. some tables may be unflipped by now.
		await asyncio.sleep(randfloat(0,1.5))
		tables = ""

		deleteTables = []
		#unflip tables in self.flippedTables[channel.id]
		for t, n in self.flippedTables[channel.id].items():
			unflipped = t.replace('┻','┬').replace('━','─') + " ノ( ゜-゜ノ)" + "\n"
			for i in range(0,n):
				tables += unflipped
				#in case being processed in parallel
				self.flippedTables[channel.id][t] -= 1
			deleteTables.append(t)
		for t in deleteTables:
			del self.flippedTables[channel.id][t]
		if tables != "":
			await self.bot.send_message(channel, tables) 
Example 73
Project: zabbix   Author: xiaomatech   File: hadoop-collector.py    MIT License 4 votes vote down vote up
def collect_tasktracker(self):

        content = self.request('http://%s:%d/machines.jsp?type=active' % (
            self.args.jobtracker_host, self.args.jobtracker_port))

        lines = iter(content.split('\n'))
        jthead = None
        for line in lines:
            if line.startswith('<tr><td><b>Name'):
                jthead = line
            elif jthead is not None:
                jthead += line
                if '</tr>' in line:
                    break

        jtbody = None
        for line in lines:
            if line.startswith('<tr>') \
                    and self.args.host in line:
                jtbody = line
            elif jtbody is not None:
                jtbody += line
                if '</tr>' in line:
                    break

        iter_head = re.finditer('<td[^>]*>(.*?)</td>', jthead)
        iter_body = re.finditer('<td[^>]*>(.*?)</td>', jtbody)
        jtmap = {}
        for mo_head in iter_head:
            mo_body = iter_body.next()
            jtmap[PTRN_TAG.sub('', mo_head.group(1)).strip()] = \
                    PTRN_TAG.sub('', mo_body.group(1)).strip()

        result = {}
        result['task_running'] = jtmap['# running tasks']
        result['task_capacity'] = int(jtmap['Max Map Tasks']) + int(jtmap[
            'Max Reduce Tasks'])
        result['task_failed'] = jtmap['Task Failures']
        result['task_total'] = jtmap['Total Tasks Since Start']
        result['task_succeeded'] = jtmap['Succeeded Tasks Since Start']

        self.send_result(result) 
Example 74
Project: zabbix   Author: xiaomatech   File: hadoop-collector.py    MIT License 4 votes vote down vote up
def collect_datanode(self):

        content = self.request('http://%s:%d/dfsnodelist.jsp?whatNodes=LIVE' %
                               (self.args.namenode_host,
                                self.args.namenode_port))

        lines = iter(content.split('\n'))
        for line in lines:
            if line.startswith('<tr class="headerRow">'):
                break
        jthead = line

        for line in lines:
            if line.startswith('<tr') \
                    and self.args.host in line:
                break
        jtbody = re.sub('<table[^>]*>.*?</table>', '', line)

        iter_head = re.finditer('<th[^>]*>(.*?)(?=<th|$)', jthead)
        iter_body = re.finditer('<td[^>]*>(.*?)(?=<td|$)', jtbody)
        jtmap = {}
        ptrn_quote = re.compile(r'\((.*?)\)')
        for mo_head in iter_head:
            mo_body = iter_body.next()

            k = PTRN_TAG.sub('', mo_head.group(1))
            if '(%)' in k:
                continue

            mo = ptrn_quote.search(k)
            k = ptrn_quote.sub('', k).strip()
            v = PTRN_TAG.sub('', mo_body.group(1)).strip()

            if mo is not None:
                jtmap[k] = '%s %s' % (v, mo.group(1))
            else:
                jtmap[k] = v

        result = {}
        result['dfs_capacity'] = self.regulate_size(jtmap[
            'Configured Capacity'])
        result['dfs_used'] = self.regulate_size(jtmap['Used'])
        result['dfs_used_other'] = self.regulate_size(jtmap['Non DFS Used'])
        result['dfs_remaining'] = self.regulate_size(jtmap['Remaining'])
        result['block_count'] = jtmap['Blocks']

        self.send_result(result) 
Example 75
Project: SublimeKSP   Author: nojanath   File: preprocessor_plugins.py    GNU General Public License v3.0 4 votes vote down vote up
def substituteValue(self, command, listOfOtherDefines, line=None):
		""" Replace all occurances of the define constant in the given command with its value. """
		newCommand = command
		if self.name in command:
			if not self.args:
				newCommand = re.sub(r"\b%s\b" % self.name, self.value, command)
			else:
				lineObj = line or self.line
				matchIt = re.finditer(r"\b%s\b" % self.name, command)
				for match in matchIt:
					# Parse the match
					matchPos = match.start()
					parenthCount = 0
					preBracketFlag = True # Flag to show when the first bracket is found.
					foundString = []
					for char in command[matchPos:]:
						if char == "(":
							parenthCount += 1
							preBracketFlag = False
						elif char == ")":
							parenthCount -= 1
						foundString.append(char)
						if parenthCount == 0 and preBracketFlag == False:
							break
					foundString = "".join(foundString)

					# Check whether the args are valid
					openBracketPos = foundString.find("(")
					if openBracketPos == -1:
						raise ksp_compiler.ParseException(lineObj, "No arguments found for define macro: %s" % foundString)
					argsString = foundString[openBracketPos + 1 : len(foundString) - 1]
					foundArgs = ksp_compiler.split_args(argsString, lineObj)
					if len(foundArgs) != len(self.args):
						# The number of args could be incorrect because there are other defines in the arg list, therefore first evaluate
						# all other defines in the args. If still incorrect, raise an exception.
						for defineObj in listOfOtherDefines:
							argsString = defineObj.substituteValue(argsString, listOfOtherDefines)
						foundArgs = ksp_compiler.split_args(argsString, lineObj)
						if len(foundArgs) != len(self.args):
							raise ksp_compiler.ParseException(lineObj, "Incorrect number of arguments in define macro: %s. Expected %d, got %d.\n" % (foundString, len(self.args), len(foundArgs)))

					# Build the new value using the given args
					newVal = self.value
					for argIdx, arg in enumerate(self.args):
						if arg.startswith("#") and arg.endswith("#"):
							newVal = re.sub(arg, foundArgs[argIdx], newVal)
						else:
							newVal = re.sub(r"\b%s\b" % arg, foundArgs[argIdx], newVal)
					newCommand = newCommand.replace(foundString, newVal)
		return(newCommand) 
Example 76
Project: SLiPy   Author: glentner   File: mpfitexpr.py    GNU General Public License v2.0 4 votes vote down vote up
def mpfitexpr(func, x, y, err , start_params, check=True, full_output=False,
						imports=None, **kw):
	"""Fit the used defined expression to the data
	Input:
	- func: string with the function definition 
	- x: x vector
	- y: y vector
	- err: vector with the errors of y
	- start_params: the starting parameters for the fit
	Output:
	- The tuple (params, yfit) with best-fit params and the values of func evaluated at x
	Keywords:
	- check: boolean parameter. If true(default) the function will be checked for sanity
	- full_output: boolean parameter. If True(default is False) then instead of best-fit parameters the mpfit object is returned
	- imports: list of strings, of optional modules to be imported, required to evaluate the function
	Example:
	params,yfit=mpfitexpr('p[0]+p[2]*(x-p[1])',x,y,err,[0,10,1])
	
	If you need to use numpy and scipy functions in your function, then
		you must to use the full names of these functions, e.g.:
		numpy.sin, numpy.cos etc.
	
	This function is motivated by mpfitexpr() from wonderful MPFIT IDL package
		written by Craig Markwardt	
	
	"""

	hash={}
	hash['numpy']=numpy
	hash['scipy']=scipy
	
	if imports is not None:
		for i in imports:
			#exec '%s=__import__("%s")'%(a,b) in globals(),locals()
			hash[i]= __import__(i)
	def myfunc(p,fjac=None,x=None, y=None, err=None):
		return [0, eval('(y-(%s))/err'%func,hash,locals())]

	myre = "(?:[^a-zA-Z_]|^)p\[(\d+)\]"
	r = re.compile(myre)
	maxp = -1
	for m in re.finditer(r,func):
		curp = int(m.group(1))
		maxp = curp if curp > maxp else maxp	
	if check:
		if maxp == -1: 
			raise Exception("wrong function format")
		if maxp + 1 != len(start_params):
			raise Exception("the length of the start_params != the length of the parameter verctor of the function")
	fa={'x' : x, 'y' : y,'err' : err}
	res = mpfit.mpfit(myfunc,start_params,functkw=fa,**kw)
	yfit = eval(func, hash, {'x':x, 'p': res.params})
	if full_output:
		return (res, yfit)
	else:
		return (res.params, yfit) 
Example 77
Project: regex4ocr   Author: juntossomosmais   File: extraction.py    MIT License 4 votes vote down vote up
def get_table_rows(all_rows, drm):
    """
    Extract rows from the table data substring of the OCR result string
    by using the DRM key "line_start" which denotes the regexp that
    matches the beginning of EACH new line of the tabular data.

    Args:
        all_rows (str): substring containing all rows from the OCR string;
        drm (dict): DRM dict object for parsing the OCR all rows string.

    Returns:
        (list): List of all the matched rows of the all_rows substring
                of the original OCR result.
    """
    if not all_rows:
        return []

    # table data is guaranteed here
    row_start_re = drm["table"]["line_start"]
    row_matches = re.finditer(row_start_re, all_rows)

    # holds all line start indexes when regexp matches
    line_start_indexes = []

    # holds all the end of line indexes
    line_ends_indexes = []

    # holds rows slices of the original string based on start:end indexes
    rows = []

    if row_matches:

        logger.debug("Found row matches, iterating over each match...")

        for m in row_matches:
            # gets the indexes of all_rows string where the regexp
            # 'line_start' matches
            line_start_indexes.append(m.span()[0])

        # the end of a line in the all_rows string is marked by
        # the beginning of the next line_start_index
        for i in range(0, len(line_start_indexes) - 1):
            line_ends_indexes.append(line_start_indexes[i + 1])

        # the last index that marks the end of the last line is
        # length of the all_rows string
        all_rows_end_index = len(all_rows) - 1
        line_ends_indexes.append(all_rows_end_index)

        # appends all rows substring based on the start:end indexes
        for start, end in zip(line_start_indexes, line_ends_indexes):
            row = all_rows[start:end].replace("\n", "")
            rows.append(row)

    logger.debug("Returning rows: %s", rows)

    return rows 
Example 78
Project: multiprot   Author: strubelab   File: ranch.py    Apache License 2.0 4 votes vote down vote up
def extract_symmetric(full, symseq, embedded):
    """
    MODIFY
    Extracts one or more embedded chains from a PDBModel with a symmetric
    structure
    
    :param full:   PDBModel with symmetric structure, that contains embedded 
                        chains
    :type full: PDBModel

    :param symseq: sequence of the symmetric unit, i.e. the sequence that is
                        multiplied in the symmetric structure
    :type symseq: string
    :param embedded: dictionary with embedded domains and its position (index)
                            in the sequence of 'full'
    :type embedded: dictionary
    :return: 'full' with embedded domains concatenated at the end for each
                symmetric unit
    :type full: PDBModel
    """
    
    symunits = []
    modeled_doms = []

    if re.search(symseq, full.sequence()):

        matches = re.finditer(symseq, full.sequence())

        for match in matches:
            istart, iend = match.span()
            symunit = full.takeResidues(list(range(istart, iend)))
            # Extract embedded domains one symunit at a time
            extracted = extract_embedded(symunit, embedded)
            symunits.append(extracted[0])
            modeled_doms.append(extracted[1][0])

        r = symunits[0]
        out_symseq = r.sequence()

        for i in range(1,len(symunits)):
            r = r.concat(symunits[i])

        r.addChainId()
        r['serial_number'] = N.arange(1,len(r)+1)

    else:
        raise MatchError("Symseq could not be found inside the full domain")

    return r, modeled_doms, out_symseq 
Example 79
Project: razzy-spinner   Author: rafasashi   File: tree.py    GNU General Public License v3.0 4 votes vote down vote up
def chunk(s, chunk_node="NP", top_node="S"):
    """
    Divide a string of chunked tagged text into
    chunks and unchunked tokens, and produce a C{Tree}.
    Chunks are marked by square brackets (C{[...]}).  Words are
    deliniated by whitespace, and each word should have the form
    C{I{text}/I{tag}}.  Words that do not contain a slash are
    assigned a C{tag} of C{None}.

    @return: A tree corresponding to the string representation.
    @rtype: C{tree}
    @param s: The string to be converted
    @type s: C{string}
    @param chunk_node: The label to use for chunk nodes
    @type chunk_node: C{string}
    @param top_node: The label to use for the root of the tree
    @type top_node: C{string}
    """

    WORD_OR_BRACKET = re.compile(r'\[|\]|[^\[\]\s]+')
    VALID = re.compile(r'^([^\[\]]+|\[[^\[\]]*\])*$')

    if not VALID.match(s):
        raise ValueError, 'Invalid token string (bad brackets)'
        
    stack = [Tree(top_node, [])]
    for match in WORD_OR_BRACKET.finditer(s):
        text = match.group()
        if text[0] == '[':
            chunk = Tree(chunk_node, [])
            stack[-1].append(chunk)
            stack.append(chunk)
        elif text[0] == ']':
            stack.pop()
        else:
            slash = text.rfind('/')
            if slash >= 0:
                tok = (text[:slash], text[slash+1:])
            else:
                tok = (text, None)
            stack[-1].append(tok)

    return stack[0]

### CONLL 
Example 80
Project: razzy-spinner   Author: rafasashi   File: util.py    GNU General Public License v3.0 4 votes vote down vote up
def tagstr2tree(s, chunk_label="NP", root_label="S", sep='/',
                source_tagset=None, target_tagset=None):
    """
    Divide a string of bracketted tagged text into
    chunks and unchunked tokens, and produce a Tree.
    Chunks are marked by square brackets (``[...]``).  Words are
    delimited by whitespace, and each word should have the form
    ``text/tag``.  Words that do not contain a slash are
    assigned a ``tag`` of None.

    :param s: The string to be converted
    :type s: str
    :param chunk_label: The label to use for chunk nodes
    :type chunk_label: str
    :param root_label: The label to use for the root of the tree
    :type root_label: str
    :rtype: Tree
    """

    WORD_OR_BRACKET = re.compile(r'\[|\]|[^\[\]\s]+')

    stack = [Tree(root_label, [])]
    for match in WORD_OR_BRACKET.finditer(s):
        text = match.group()
        if text[0] == '[':
            if len(stack) != 1:
                raise ValueError('Unexpected [ at char %d' % match.start())
            chunk = Tree(chunk_label, [])
            stack[-1].append(chunk)
            stack.append(chunk)
        elif text[0] == ']':
            if len(stack) != 2:
                raise ValueError('Unexpected ] at char %d' % match.start())
            stack.pop()
        else:
            if sep is None:
                stack[-1].append(text)
            else:
                word, tag = str2tuple(text, sep)
                if source_tagset and target_tagset:
                    tag = map_tag(source_tagset, target_tagset, tag)
                stack[-1].append((word, tag))

    if len(stack) != 1:
        raise ValueError('Expected ] at char %d' % len(s))
    return stack[0]

### CONLL