Python _sre.compile() Examples

The following are 30 code examples of _sre.compile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module _sre , or try the search function .
Example #1
Source File: test_re.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_re_groupref_exists(self):
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
                         ('(', 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
                         (None, 'a'))
        self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'))
        self.assertIsNone(re.match('^(\()?([^()]+)(?(1)\))$', '(a'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
                         ('a', 'b'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
                         ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc').groups(),
                         ('a', 'b', 'c'))
        self.assertEqual(p.match('ad').groups(),
                         ('a', None, 'd'))
        self.assertIsNone(p.match('abd'))
        self.assertIsNone(p.match('ac')) 
Example #2
Source File: test_re.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_symbolic_groups(self):
        re.compile('(?P<a>x)(?P=a)(?(a)y)')
        re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
        self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
        self.assertRaises(re.error, re.compile, '(?Px)')
        self.assertRaises(re.error, re.compile, '(?P=)')
        self.assertRaises(re.error, re.compile, '(?P=1)')
        self.assertRaises(re.error, re.compile, '(?P=a)')
        self.assertRaises(re.error, re.compile, '(?P=a1)')
        self.assertRaises(re.error, re.compile, '(?P=a.)')
        self.assertRaises(re.error, re.compile, '(?P<)')
        self.assertRaises(re.error, re.compile, '(?P<>)')
        self.assertRaises(re.error, re.compile, '(?P<1>)')
        self.assertRaises(re.error, re.compile, '(?P<a.>)')
        self.assertRaises(re.error, re.compile, '(?())')
        self.assertRaises(re.error, re.compile, '(?(a))')
        self.assertRaises(re.error, re.compile, '(?(1a))')
        self.assertRaises(re.error, re.compile, '(?(a.))') 
Example #3
Source File: test_re.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_debug_flag(self):
        pat = r'(\.)(?:[ch]|py)(?(1)$|: )'
        with captured_stdout() as out:
            re.compile(pat, re.DEBUG)
        dump = '''\
subpattern 1
  literal 46
subpattern None
  branch
    in
      literal 99
      literal 104
  or
    literal 112
    literal 121
subpattern None
  groupref_exists 1
    at at_end
  else
    literal 58
    literal 32
'''
        self.assertEqual(out.getvalue(), dump)
        # Debug output is output again even a second time (bypassing
        # the cache -- issue #20426).
        with captured_stdout() as out:
            re.compile(pat, re.DEBUG)
        self.assertEqual(out.getvalue(), dump) 
Example #4
Source File: test_re.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_re_groupref_exists(self):
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
                         ('(', 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
                         (None, 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
                         ('a', 'b'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
                         ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc').groups(),
                         ('a', 'b', 'c'))
        self.assertEqual(p.match('ad').groups(),
                         ('a', None, 'd'))
        self.assertEqual(p.match('abd'), None)
        self.assertEqual(p.match('ac'), None) 
Example #5
Source File: test_re.py    From jawfish with MIT License 6 votes vote down vote up
def test_bug_6509(self):
        # Replacement strings of both types must parse properly.
        # all strings
        pat = re.compile('a(\w)')
        self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
        pat = re.compile('a(.)')
        self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
        pat = re.compile('..')
        self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')

        # all bytes
        pat = re.compile(b'a(\w)')
        self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
        pat = re.compile(b'a(.)')
        self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
        pat = re.compile(b'..')
        self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes') 
Example #6
Source File: test_re.py    From jawfish with MIT License 6 votes vote down vote up
def test_finditer(self):
        iter = re.finditer(r":+", "a:b::c:::d")
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", 1, 10)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", pos=1, endpos=10)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", endpos=10, pos=1)
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

        pat = re.compile(r":+")
        iter = pat.finditer("a:b::c:::d", pos=3, endpos=8)
        self.assertEqual([item.group(0) for item in iter],
                         ["::", "::"]) 
Example #7
Source File: test_re.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_symbolic_groups(self):
        re.compile('(?P<a>x)(?P=a)(?(a)y)')
        re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
        self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
        self.assertRaises(re.error, re.compile, '(?Px)')
        self.assertRaises(re.error, re.compile, '(?P=)')
        self.assertRaises(re.error, re.compile, '(?P=1)')
        self.assertRaises(re.error, re.compile, '(?P=a)')
        self.assertRaises(re.error, re.compile, '(?P=a1)')
        self.assertRaises(re.error, re.compile, '(?P=a.)')
        self.assertRaises(re.error, re.compile, '(?P<)')
        self.assertRaises(re.error, re.compile, '(?P<>)')
        self.assertRaises(re.error, re.compile, '(?P<1>)')
        self.assertRaises(re.error, re.compile, '(?P<a.>)')
        self.assertRaises(re.error, re.compile, '(?())')
        self.assertRaises(re.error, re.compile, '(?(a))')
        self.assertRaises(re.error, re.compile, '(?(1a))')
        self.assertRaises(re.error, re.compile, '(?(a.))') 
Example #8
Source File: sre_compile.py    From kobo-predict with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def compile(p, flags=0):
    # internal: convert pattern list to internal format

    if isstring(p):
        pattern = p
        p = sre_parse.parse(p, flags)
    else:
        pattern = None

    code = _code(p, flags)

    # print(code)

    # map in either direction
    groupindex = p.pattern.groupdict
    indexgroup = [None] * p.pattern.groups
    for k, i in groupindex.items():
        indexgroup[i] = k

    return _sre.compile(
        pattern, flags | p.pattern.flags, code,
        p.pattern.groups-1,
        groupindex, indexgroup
        ) 
Example #9
Source File: test_re.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_other_escapes(self):
        self.assertRaises(re.error, re.compile, "\\")
        self.assertEqual(re.match(r"\(", '(').group(), '(')
        self.assertIsNone(re.match(r"\(", ')'))
        self.assertEqual(re.match(r"\\", '\\').group(), '\\')
        self.assertEqual(re.match(r"[\]]", ']').group(), ']')
        self.assertIsNone(re.match(r"[\]]", '['))
        self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
        self.assertIsNone(re.match(r"[a\-c]", 'b'))
        self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
        self.assertIsNone(re.match(r"[\^a]+", 'b'))
        re.purge()  # for warnings
        for c in 'ceghijklmopquyzCEFGHIJKLMNOPQRTUVXY':
            warn = FutureWarning if c in 'Uu' else DeprecationWarning
            with check_py3k_warnings(('', warn)):
                self.assertEqual(re.match('\\%c$' % c, c).group(), c)
                self.assertIsNone(re.match('\\%c' % c, 'a'))
        for c in 'ceghijklmopquyzABCEFGHIJKLMNOPQRTUVXYZ':
            warn = FutureWarning if c in 'Uu' else DeprecationWarning
            with check_py3k_warnings(('', warn)):
                self.assertEqual(re.match('[\\%c]$' % c, c).group(), c)
                self.assertIsNone(re.match('[\\%c]' % c, 'a')) 
Example #10
Source File: test_re.py    From jawfish with MIT License 6 votes vote down vote up
def test_re_groupref_exists(self):
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
                         ('(', 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
                         (None, 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
                         ('a', 'b'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
                         ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc').groups(),
                         ('a', 'b', 'c'))
        self.assertEqual(p.match('ad').groups(),
                         ('a', None, 'd'))
        self.assertEqual(p.match('abd'), None)
        self.assertEqual(p.match('ac'), None) 
Example #11
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_bug_2537(self):
        # issue 2537: empty submatches
        for outer_op in ('{0,}', '*', '+', '{1,187}'):
            for inner_op in ('{0,}', '*', '?'):
                r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
                m = r.match("xyyzy")
                self.assertEqual(m.group(0), "xyy")
                self.assertEqual(m.group(1), "")
                self.assertEqual(m.group(2), "y") 
Example #12
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_inline_flags(self):
        # Bug #1700
        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow

        p = re.compile(upper_char, re.I | re.U)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile(lower_char, re.I | re.U)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?i)' + upper_char, re.U)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?i)' + lower_char, re.U)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?iu)' + upper_char)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?iu)' + lower_char)
        q = p.match(upper_char)
        self.assertNotEqual(q, None) 
Example #13
Source File: sre_compile.py    From kobo-predict with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _code(p, flags):

    flags = p.pattern.flags | flags
    code = []

    # compile info block
    _compile_info(code, p, flags)

    # compile the pattern
    _compile(code, p.data, flags)

    code.append(SUCCESS)

    return code 
Example #14
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_bug_764548(self):
        # bug 764548, re.compile() barfs on str/unicode subclasses
        try:
            unicode
        except NameError:
            return  # no problem if we have no unicode
        class my_unicode(unicode): pass
        pat = re.compile(my_unicode("abc"))
        self.assertEqual(pat.match("xyz"), None) 
Example #15
Source File: sre_compile.py    From kobo-predict with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _compile_charset(charset, flags, code, fixup=None, fixes=None):
    # compile charset subprogram
    emit = code.append
    for op, av in _optimize_charset(charset, fixup, fixes):
        emit(op)
        if op is NEGATE:
            pass
        elif op is LITERAL:
            emit(av)
        elif op is RANGE or op is RANGE_IGNORE:
            emit(av[0])
            emit(av[1])
        elif op is CHARSET:
            code.extend(av)
        elif op is BIGCHARSET:
            code.extend(av)
        elif op is CATEGORY:
            if flags & SRE_FLAG_LOCALE:
                emit(CH_LOCALE[av])
            elif flags & SRE_FLAG_UNICODE:
                emit(CH_UNICODE[av])
            else:
                emit(av)
        else:
            raise error("internal: unsupported set operator %r" % (op,))
    emit(FAILURE) 
Example #16
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_bug_926075(self):
        try:
            unicode
        except NameError:
            return # no problem if we have no unicode
        self.assertTrue(re.compile('bug_926075') is not
                     re.compile(eval("u'bug_926075'"))) 
Example #17
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_bug_931848(self):
        try:
            unicode
        except NameError:
            pass
        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
        self.assertEqual(re.compile(pattern).split("a.b.c"),
                         ['a','b','c']) 
Example #18
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_empty_array(self):
        # SF buf 1647541
        import array
        for typecode in 'cbBuhHiIlLfd':
            a = array.array(typecode)
            self.assertEqual(re.compile("bla").match(a), None)
            self.assertEqual(re.compile("").match(a).groups(), ()) 
Example #19
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_dealloc(self):
        # issue 3299: check for segfault in debug build
        import _sre
        # the overflow limit is different on wide and narrow builds and it
        # depends on the definition of SRE_CODE (see sre.h).
        # 2**128 should be big enough to overflow on both. For smaller values
        # a RuntimeError is raised instead of OverflowError.
        long_overflow = 2**128
        self.assertRaises(TypeError, re.finditer, "a", {})
        self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow]) 
Example #20
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_keyword_parameters(self):
        # Issue #20283: Accepting the string keyword parameter.
        pat = re.compile(r'(ab)')
        self.assertEqual(
            pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
        self.assertEqual(
            pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
        self.assertEqual(
            pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
        self.assertEqual(
            pat.split(string='abracadabra', maxsplit=1),
            ['', 'ab', 'racadabra']) 
Example #21
Source File: test_re.py    From BinderFilter with MIT License 5 votes vote down vote up
def test_weakref(self):
        s = 'QabbbcR'
        x = re.compile('ab+c')
        y = proxy(x)
        self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR')) 
Example #22
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_group_name_in_exception(self):
        # Issue 17341: Poor error message when compiling invalid regex
        with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
            re.compile('(?P<?foo>)') 
Example #23
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_backref_group_name_in_exception(self):
        # Issue 17341: Poor error message when compiling invalid regex
        with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
            re.compile('(?P=<foo>)') 
Example #24
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_repeat_minmax_overflow_maxrepeat(self):
        try:
            from _sre import MAXREPEAT
        except ImportError:
            self.skipTest('requires _sre.MAXREPEAT constant')
        string = "x" * 100000
        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
                         (0, 100000))
        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
        self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
        self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT) 
Example #25
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_repeat_minmax_overflow(self):
        # Issue #13169
        string = "x" * 100000
        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
        self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
        self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128)) 
Example #26
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_compile(self):
        # Test return value when given string and pattern as parameter
        pattern = re.compile('random pattern')
        self.assertIsInstance(pattern, re._pattern_type)
        same_pattern = re.compile(pattern)
        self.assertIsInstance(same_pattern, re._pattern_type)
        self.assertIs(same_pattern, pattern)
        # Test behaviour when not given a string or pattern as parameter
        self.assertRaises(TypeError, re.compile, 0) 
Example #27
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_dollar_matches_twice(self):
        "$ matches the end of string, and just before the terminating \n"
        pattern = re.compile('$')
        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')

        pattern = re.compile('$', re.MULTILINE)
        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#') 
Example #28
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_empty_array(self):
        # SF buf 1647541
        import array
        typecodes = 'cbBhHiIlLfd'
        if have_unicode:
            typecodes += 'u'
        for typecode in typecodes:
            a = array.array(typecode)
            self.assertIsNone(re.compile("bla").match(a))
            self.assertEqual(re.compile("").match(a).groups(), ()) 
Example #29
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_bug_581080(self):
        iter = re.finditer(r"\s", "a b")
        self.assertEqual(iter.next().span(), (1,2))
        self.assertRaises(StopIteration, iter.next)

        scanner = re.compile(r"\s").scanner("a b")
        self.assertEqual(scanner.search().span(), (1, 2))
        self.assertIsNone(scanner.search()) 
Example #30
Source File: test_re.py    From ironpython2 with Apache License 2.0 5 votes vote down vote up
def test_bug_931848(self):
        pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
        self.assertEqual(re.compile(pattern).split("a.b.c"),
                         ['a','b','c'])