Python re.Scanner() Examples

The following are 18 code examples of re.Scanner(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module re , or try the search function .
Example #1
Source File: test_re.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #2
Source File: test_re.py    From CTFCrackTools with GNU General Public License v3.0 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #3
Source File: test_re.py    From CTFCrackTools-V2 with GNU General Public License v3.0 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #4
Source File: test_re.py    From medicare-demo with Apache License 2.0 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #5
Source File: test_re.py    From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #6
Source File: test_re.py    From gcblue with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #7
Source File: test_re.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #8
Source File: test_re.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #9
Source File: test_re.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #10
Source File: test_re.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_scanner(self):
        def s_ident(scanner, token): return token
        def s_operator(scanner, token): return "op%s" % token
        def s_float(scanner, token): return float(token)
        def s_int(scanner, token): return int(token)

        scanner = Scanner([
            (r"[a-zA-Z_]\w*", s_ident),
            (r"\d+\.\d*", s_float),
            (r"\d+", s_int),
            (r"=|\+|-|\*|/", s_operator),
            (r"\s+", None),
            ])

        self.assertTrue(scanner.scanner.scanner("").pattern)

        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
                           'op+', 'bar'], '')) 
Example #11
Source File: cql.py    From cassandra-migrate with MIT License 6 votes vote down vote up
def scanner(cls):
        if not getattr(cls, '_scanner', None):
            def h(tpe):
                return lambda sc, tk: cls.Token(tpe, tk)

            cls._scanner = re.Scanner([
                (r"(--|//).*?$",               h(cls.LINE_COMMENT)),
                (r"\/\*.+?\*\/",               h(cls.BLOCK_COMMENT)),
                (r'"(?:[^"\\]|\\.)*"',         h(cls.STRING)),
                (r"'(?:[^'\\]|\\.)*'",         h(cls.STRING)),
                (r"\$\$(?:[^\$\\]|\\.)*\$\$",  h(cls.STRING)),
                (r";",                         h(cls.SEMICOLON)),
                (r"\s+",                       h(cls.WHITESPACE)),
                (r".",                         h(cls.OTHER))
            ], re.MULTILINE | re.DOTALL)
        return cls._scanner 
Example #12
Source File: test_re.py    From jawfish with MIT License 5 votes vote down vote up
def test_unlimited_zero_width_repeat(self):
        # Issue #9669
        self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
        self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
        self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
        self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
        self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
        self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))

#    def test_scanner(self):
#        def s_ident(scanner, token): return token
#        def s_operator(scanner, token): return "op%s" % token
#        def s_float(scanner, token): return float(token)
#        def s_int(scanner, token): return int(token)
#
#        scanner = Scanner([
#            (r"[a-zA-Z_]\w*", s_ident),
#            (r"\d+\.\d*", s_float),
#            (r"\d+", s_int),
#            (r"=|\+|-|\*|/", s_operator),
#            (r"\s+", None),
#            ])
#
#        self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
#
#        self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
#                         (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
#                           'op+', 'bar'], '')) 
Example #13
Source File: query.py    From walrus with MIT License 5 votes vote down vote up
def __init__(self, query, default_conjunction='AND'):
        self.query = query
        self.default_conjunction = default_conjunction

        def yield_symbol(symbol_type):
            def callback(scanner, token):
                return (symbol_type, token)
            return callback

        def yield_string(scanner, token):
            return ('STRING', token[1:-1].lower())

        def yield_simple_string(scanner, token):
            return ('STRING', token.lower())

        self.scanner = re.Scanner([
            (r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"', yield_string),
            (r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", yield_string),
            (r'\bAND\b', yield_symbol('AND')),
            (r'\bOR\b', yield_symbol('OR')),
            (r'[@_\-\w]+', yield_simple_string),
            (r'&', yield_symbol('AND')),
            (r'\|', yield_symbol('OR')),
            (r'\(', yield_symbol('LPAREN')),
            (r'\)', yield_symbol('RPAREN')),
            (r'\s+', None),
        ], re.U) 
Example #14
Source File: tokenizer.py    From mpfshell with MIT License 5 votes vote down vote up
def __init__(self):

        valid_fnchars = r"A-Za-z0-9_%#~@/\$!\*\.\+\-\:"

        tokens = [
            (r"[%s]+" % valid_fnchars, lambda scanner, token: Token(Token.STR, token)),
            (
                r'"[%s ]+"' % valid_fnchars,
                lambda scanner, token: Token(Token.QSTR, token[1:-1]),
            ),
            (r"[ ]", lambda scanner, token: None),
        ]

        self.scanner = re.Scanner(tokens) 
Example #15
Source File: constraint.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _tokenize_constraint(string, variable_names):
    lparen_re = r"\("
    rparen_re = r"\)"
    op_re = "|".join([re.escape(op.token_type) for op in _ops])
    num_re = r"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?"
    whitespace_re = r"\s+"

    # Prefer long matches:
    variable_names = sorted(variable_names, key=len, reverse=True)
    variable_re = "|".join([re.escape(n) for n in variable_names])

    lexicon = [
        (lparen_re, _token_maker(Token.LPAREN, string)),
        (rparen_re, _token_maker(Token.RPAREN, string)),
        (op_re, _token_maker("__OP__", string)),
        (variable_re, _token_maker("VARIABLE", string)),
        (num_re, _token_maker("NUMBER", string)),
        (whitespace_re, None),
        ]

    scanner = re.Scanner(lexicon)
    tokens, leftover = scanner.scan(string)
    if leftover:
        offset = len(string) - len(leftover)
        raise PatsyError("unrecognized token in constraint",
                            Origin(string, offset, offset + 1))

    return tokens 
Example #16
Source File: _parser.py    From clifford with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _tokenize(layout: Layout, mv_string: str):
    # Get the names of the canonical blades
    blade_name_index_map = {name: index for index, name in enumerate(layout.names)}

    tokenizer = re.Scanner([(
        r'\s+',
        lambda s, t: ('space', s.match, None)
    ), (
        r'\(',
        lambda s, t: ('(', s.match, None)
    ),  (
        r'\)',
        lambda s, t: (')', s.match, None)
    ), (
        r'[+-]',
        lambda s, t: ('sign', s.match, 1 if t == '+' else -1)
    ), (
        _unsigned_float_pattern,
        lambda s, t: ('coeff', s.match, float(t))
    ), (
        r'\^',
        lambda s, t: ('wedge', s.match, None)
    ), (
        r'\b(?:{})\b'.format('|'.join(
            re.escape(name)
            for name in layout.names
            if name
        )),
        lambda s, t: ('blade', s.match, blade_name_index_map[t])
    ), (
        r'.',
        lambda s, t: ('unrecognized', s.match, None)
    )])

    tokens, rest = tokenizer.scan(mv_string)
    assert not rest  # our unrecognized match should handle this
    return tokens + [
        ('end', re.compile(r'$').match(mv_string, len(mv_string)), None)
    ] 
Example #17
Source File: parsers.py    From PyMO with MIT License 5 votes vote down vote up
def __init__(self):

        def identifier(scanner, token):
            return 'IDENT', token

        def operator(scanner, token):
            return 'OPERATOR', token

        def digit(scanner, token):
            return 'DIGIT', token

        def open_brace(scanner, token):
            return 'OPEN_BRACE', token

        def close_brace(scanner, token):
            return 'CLOSE_BRACE', token

        self.scanner = re.Scanner([
            (r'[a-zA-Z_]\w*', identifier),
            #(r'-*[0-9]+(\.[0-9]+)?', digit), # won't work for .34
            #(r'[-+]?[0-9]*\.?[0-9]+', digit), # won't work for 4.56e-2
            #(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', digit),
            (r'-*[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', digit),
            (r'}', close_brace),
            (r'}', close_brace),
            (r'{', open_brace),
            (r':', None),
            (r'\s+', None)
        ]) 
Example #18
Source File: usage_lexer.py    From cmake_format with GNU General Public License v3.0 5 votes vote down vote up
def tokenize_inner(contents):
  """
  Scan a string and return a list of Token objects representing the contents
  of the cmake listfile.
  """

  # Regexes are in priority order. Changing the order may alter the
  # behavior of the lexer
  scanner = re.Scanner([
      # Optional group brackets
      (r"\[", lambda s, t: (TokenType.LSQ_BRACKET, t)),
      (r"\]", lambda s, t: (TokenType.RSQ_BRACKET, t)),
      # Mandatory group brackets
      ("<", lambda s, t: (TokenType.LA_BRACKET, t)),
      (">", lambda s, t: (TokenType.RA_BRACKET, t)),
      # Parenthesis
      (r"\(", lambda s, t: (TokenType.LPAREN, t)),
      (r"\)", lambda s, t: (TokenType.RPAREN, t)),
      # Pipe character
      (r"\|", lambda s, t: (TokenType.PIPE, t)),
      # uppercase name
      (r"[A-Z0-9_]+", lambda s, t: (TokenType.BIGNAME, t)),
      # lowercase name
      (r"[a-z0-9_\-]+", lambda s, t: (TokenType.SMALLNAME, t)),
      # ellipsis
      (r"\.\.\.", lambda s, t: (TokenType.ELLIPSIS, t)),
      # whitespace
      (r"\s+", lambda s, t: (TokenType.WHITESPACE, t)),
  ], re.DOTALL)

  tokens, remainder = scanner.scan(contents)
  if remainder:
    raise ValueError("Unparsed tokens: {}".format(remainder))
  return tokens