Python re.Scanner() Examples

The following are code examples for showing how to use re.Scanner(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don't like. You can also save this page to your account.

Example 1
Project: ndk-python   Author: gittor   File: test_re.py    (license) View Source Project 7 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example 2
Project: cassandra-migrate   Author: Cobliteam   File: cql.py    (license) View Source Project 6 votes vote down vote up
def scanner(cls):
        if not getattr(cls, '_scanner', None):
            def h(tpe):
                return lambda sc, tk: cls.Token(tpe, tk)

            cls._scanner = re.Scanner([
                (r"(--|//).*?$",               h(cls.LINE_COMMENT)),
                (r"\/\*.+?\*\/",               h(cls.BLOCK_COMMENT)),
                (r'"(?:[^"\\]|\\.)*"',         h(cls.STRING)),
                (r"'(?:[^'\\]|\\.)*'",         h(cls.STRING)),
                (r"\$\$(?:[^\$\\]|\\.)*\$\$",  h(cls.STRING)),
                (r";",                         h(cls.SEMICOLON)),
                (r"\s+",                       h(cls.WHITESPACE)),
                (r".",                         h(cls.OTHER))
            ], re.MULTILINE | re.DOTALL)
        return cls._scanner 
Example 3
Project: zippy   Author: securesystemslab   File: test_re.py    (license) View Source Project 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example 4
Project: oil   Author: oilshell   File: test_re.py    (license) View Source Project 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example 5
Project: python2-tracer   Author: extremecoders-re   File: test_re.py    (license) View Source Project 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example 6
Project: web_ctp   Author: molebot   File: test_re.py    (license) View Source Project 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example 7
Project: whitepy   Author: yasn77   File: ws_token.py    (license) View Source Project 6 votes vote down vote up
def _scan_int(self, string, const):
        # TODO: Add better invalid integer handling
        #       Check for integer sign, possibly treat unsigned integer
        #       as POSITIVE
        patterns = []
        INT_SIGN = (r"^[{}{}]".format(CHAR_MAP['space'], CHAR_MAP['tab']),
                    lambda scanner, token: ("INT_SIGN", token))
        INT_VAL = (r".[{}{}]*".format(CHAR_MAP['space'], CHAR_MAP['tab']),
                   lambda scanner, token: ("INT_VAL", token))
        if const == 'SIGNED_INT':
            patterns.append(INT_SIGN)
        patterns.append(INT_VAL)
        scanner = Scanner(patterns)
        found, remainder = scanner.scan(string)
        self.type = 'INT'
        try:
            self.value = ''.join([f[1] for f in found])
        except IndexError:
            print("Hit IndexError, string trying to check is: {}".
                  format(dbg(string))) 
Example 8
Project: pefile.pypy   Author: cloudtracer   File: test_re.py    (license) View Source Project 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example 9
Project: neon-py   Author: paveldedik   File: decoder.py    (license) View Source Project 6 votes vote down vote up
def parse(input_string):
    """Parses given string according to NEON syntax.

    :param input_string: String to parse.
    :type input_string: string
    :return: Parsed string.
    :rtype: :class:`OrderedDict`
    """
    tokens = tokenize(input_string)
    return Indent().parse(tokens)


#: The Scanner is instantiated with a list of re's and associated
#: functions. It is used to scan a string, returning a list of parts
#: which match the given re's.
#:
#: See: http://stackoverflow.com/a/17214398/2874089 
Example 10
Project: ouroboros   Author: pybee   File: test_re.py    (license) View Source Project 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example 11
Project: kbe_server   Author: xiaohaoppy   File: test_re.py    (license) View Source Project 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example 12
Project: whitepy   Author: yasn77   File: ws_token.py    (license) View Source Project 5 votes vote down vote up
def _scan_command(self, line, pos, const):
        patterns = [(r"^{}".format(i[0]), i[1]) for i in const]
        scanner = Scanner(patterns)
        found, remainder = scanner.scan(line[pos:])
        self.type = found[0]
        self.value = [i[0] for i in const if i[1] == self.type][0] 
Example 13
Project: mpfshell   Author: wendlers   File: tokenizer.py    (license) View Source Project 5 votes vote down vote up
def __init__(self):

        valid_fnchars = "A-Za-z0-9_%#[email protected]/\$!\*\.\+\-"

        tokens = [
            (r'[%s]+' % valid_fnchars, lambda scanner, token: Token(Token.STR, token)),
            (r'"[%s ]+"' % valid_fnchars, lambda scanner, token: Token(Token.QSTR, token[1:-1])),
            (r'[ ]', lambda scanner, token: None)
        ]

        self.scanner = re.Scanner(tokens) 
Example 14
Project: cuda-profiler   Author: NVIDIA   File: wrap.py    (license) View Source Project 5 votes vote down vote up
def __init__(self, lexicon):
        self.line_no = -1
        self.scanner = re.Scanner(lexicon) 
Example 15
Project: xiaodi   Author: shenaishiren   File: safeeval.py    (license) View Source Project 4 votes vote down vote up
def safe_eval(eval_str, **kw):
    '''
    ??eval???eval??????????????
    **kw?????????????????
    '''

    # callback functions
    def start_structure(scanner, token):
        return "start structure", token

    def key(scanner, token):
        return "key", token

    def value(scanner, token):
        # ????
        if token.lower() == 'true' and token != 'True':
            raise 'value Error "%s"' % token

    def str_value(scanner, token):
        return "string value", token

    def end_structure(scanner, token):
        return "end start structure", token

    scanner = re.Scanner([
        (r"[{\[(]", start_structure),
        (r"[\w]+\s*:", key),
        (r"['\"][^'\"]+['\"]", str_value),
        (r"[\w]+", value),
        (r"\s*,\s*", None),
        (r"[})\]]", end_structure),
    ])

    tokens, remainder = scanner.scan(eval_str)

    # make a list of safe functions
    safe_list = ['math', 'acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh', 'de grees', 'e', 'exp', 'fabs',
                 'floor', 'fmod', 'frexp', 'hypot', 'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin',
                 'sinh', 'sqrt', 'tan', 'tanh']
    # use the list to filter the local namespace s
    safe_dict = dict([(k, locals().get(k, None)) for k in safe_list])
    # add any needed builtins back in.
    # ???????????? __builtins__?????????????True?False??????
    # ??????
    safe_dict['True'] = True
    safe_dict['False'] = False
    return eval(eval_str, {'__builtins__': None}, safe_dict)