Python re.L() Examples

The following are code examples for showing how to use re.L(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don't like. You can also save this page to your account.

Example 1
Project: LDA_RecEngine   Author: easonchan1213   File: utils.py    (license) View Source Project 6 votes vote down vote up
def preprocessing(content):
    remove_punc = ('? ? ? ? ? ? ? ? ? —').split(' ')
    ## preprocessing #1 : remove XXenglishXX and numbers
    preprocessing_1 = re.compile(r'\d*',re.L)  ## only substitute numbers
    #preprocessing_1 = re.compile(r'\w*',re.L)  ## substitute number & English
    content = preprocessing_1.sub("",content)
    ## preprocessing #2 : remove punctuation
    preprocessing_2 = re.compile('[%s]' % re.escape(string.punctuation))
    content = preprocessing_2.sub("",content)
    ## preprocessing #3 : remove Chinese punctuation and multiple whitspaces
    content = content.replace('\n','')
    for punc in remove_punc:
        content = content.replace(punc,'')
    try:
        content = parsing.strip_multiple_whitespaces(content)
    except:
        print 'Warning : failed to strip whitespaces @ '   
    
    return content 
Example 2
Project: txt2evernote   Author: Xunius   File: textparse.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def defSyntax(self):
        '''Define re patterns according to syntax.'''

        #------------------REGEX patterns------------------

        if self.syntax=='markdown':

            self._img_re=re.compile('^(.*)!\\[(.+?)\\]\\((.+?)\\)', re.M | re.L)
            self._h_re_base = r'''
            (^(.+)[ \t]*\n(=+|-+)[ \t]*\n+)
            |
            (^(\#{%s})  # \1 = string of #'s
            [ \t]*
            (.+?)       # \2 = Header text
            [ \t]*
            (?<!\\)     # ensure not an escaped trailing '#'
            \#*         # optional closing #'s (not counted)
            \n+
            )
            '''
            self._all_h_re=re.compile(self._h_re_base %'1,6', re.X | re.M)

        elif self.syntax=='zim':

            self._img_re=re.compile('^(.*)\\{\\{(.+?)\\}\\}(.*)$', re.M | re.L)
            self._h_re_base = r'''
                ^(\={%s})  # \1 = string of ='s
                [ \t]*
                (.+?)       # \2 = Header text
                [ \t]*
                \1
                \n+
                '''
            self._all_h_re=re.compile(self._h_re_base %'1,6', re.X | re.M)
        else:
            raise Exception("Unknown syntax %s" %self.syntax)
            
        return 
Example 3
Project: abusehelper   Author: Exploit-install   File: test_atoms.py    (license) View Source Project 5 votes vote down vote up
def test_from_re(self):
        # re.U and re.S flags are implicitly set
        self.assertEqual(RegExp.from_re(re.compile("a", re.U)), RegExp("a"))
        self.assertEqual(RegExp.from_re(re.compile("a", re.S)), RegExp("a"))

        # re.I flag can be set explicitly
        self.assertEqual(
            RegExp.from_re(re.compile("a", re.I)),
            RegExp("a", ignore_case=True))

        # re.M, re.L and re.X are forbidden
        for flag in [re.M, re.L, re.X]:
            self.assertRaises(ValueError, RegExp.from_re, re.compile("a", flag)) 
Example 4
Project: backrefs   Author: facelessuser   File: bre.py    (license) View Source Project 5 votes vote down vote up
def iternext(self):
        """
        Iterate through characters of the string.

        Count escaped l, L, c, C, E, N, p, P, backslash as a single char.
        """

        if self.index > self.max_index:
            raise StopIteration

        char = self.string[self.index:self.index + 1]
        if char == self._b_slash:
            m = self._re_search_ref.match(self.string[self.index + 1:])
            if m:
                ref = m.group(0)
                if len(ref) == 1 and ref in self._long_search_refs:
                    if ref == self._unicode_name:
                        raise SyntaxError('Format for Unicode name is \\N{name}!')
                    elif ref == self._uni_prop:
                        raise SyntaxError('Format for Unicode property is \\p{property}!')
                    elif ref == self._inverse_uni_prop:
                        raise SyntaxError('Format for inverse Unicode property is \\P{property}!')
                char += m.group(1) if m.group(1) else m.group(2)
        elif char == self._ls_bracket:
            m = self._re_posix.match(self.string[self.index:])
            if m:
                char = m.group(0)

        self.index += len(char)
        self.current = char
        return self.current


# Templates 
Example 5
Project: zippy   Author: securesystemslab   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 6
Project: zippy   Author: securesystemslab   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertNotEqual(re.compile('^pattern$', flag), None) 
Example 7
Project: oil   Author: oilshell   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 8
Project: oil   Author: oilshell   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertTrue(re.compile('^pattern$', flag)) 
Example 9
Project: oil   Author: oilshell   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def check_en_US_iso88591(self):
        locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
        self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
        self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
        self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
        self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5')) 
Example 10
Project: oil   Author: oilshell   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def check_en_US_utf8(self):
        locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
        self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
        self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
        self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
        self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) 
Example 11
Project: python2-tracer   Author: extremecoders-re   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 12
Project: python2-tracer   Author: extremecoders-re   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertTrue(re.compile('^pattern$', flag)) 
Example 13
Project: python2-tracer   Author: extremecoders-re   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def check_en_US_iso88591(self):
        locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
        self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
        self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
        self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
        self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5')) 
Example 14
Project: python2-tracer   Author: extremecoders-re   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def check_en_US_utf8(self):
        locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
        self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
        self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
        self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
        self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) 
Example 15
Project: isard   Author: isard-vdi   File: api.py    (license) View Source Project 5 votes vote down vote up
def parse_string(self, txt):
        import re, unicodedata, locale
        if type(txt) is not str:
            txt = txt.decode('utf-8')
        #locale.setlocale(locale.LC_ALL, 'ca_ES')
        prog = re.compile("[-_àèìòùáéíóúñçÀÈÌÒÙÁÉÍÓÚÑÇ .a-zA-Z0-9]+$", re.L)
        if not prog.match(txt):
            return False
        else:
            # ~ Replace accents
            txt = ''.join((c for c in unicodedata.normalize('NFD', txt) if unicodedata.category(c) != 'Mn'))
            return txt.replace(" ", "_") 
Example 16
Project: isard   Author: isard-vdi   File: DesktopViews.py    (license) View Source Project 5 votes vote down vote up
def validCharacters(txt):
    import re, unicodedata, locale
    txt=txt.decode('utf-8')
    locale.setlocale(locale.LC_ALL, 'ca_ES')
    prog = re.compile("[-_àèìòùáéíóúñçÀÈÌÒÙÁÉÍÓÚÑÇ .a-zA-Z0-9]+$".decode('UTF-8'), re.L)
    if not prog.match(txt):
        return False
    else:
        return txt 
Example 17
Project: isard   Author: isard-vdi   File: populate.py    (license) View Source Project 5 votes vote down vote up
def _parseString(self, txt):
        import re, unicodedata, locale
        if type(txt) is not str:
            txt = txt.decode('utf-8')
        locale.setlocale(locale.LC_ALL, 'ca_ES')
        prog = re.compile("[-_àèìòùáéíóúñçÀÈÌÒÙÁÉÍÓÚÑÇ .a-zA-Z0-9]+$", re.L)
        if not prog.match(txt):
            return False
        else:
            # ~ Replace accents
            txt = ''.join((c for c in unicodedata.normalize('NFD', txt) if unicodedata.category(c) != 'Mn'))
            return txt.replace(" ", "_") 
Example 18
Project: web_ctp   Author: molebot   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 19
Project: web_ctp   Author: molebot   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertNotEqual(re.compile('^pattern$', flag), None) 
Example 20
Project: pefile.pypy   Author: cloudtracer   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 21
Project: pefile.pypy   Author: cloudtracer   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertTrue(re.compile('^pattern$', flag)) 
Example 22
Project: pefile.pypy   Author: cloudtracer   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def check_en_US_iso88591(self):
        locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
        self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
        self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
        self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
        self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5')) 
Example 23
Project: pefile.pypy   Author: cloudtracer   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def check_en_US_utf8(self):
        locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
        self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
        self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
        self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
        self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) 
Example 24
Project: ouroboros   Author: pybee   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 25
Project: ouroboros   Author: pybee   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertTrue(re.compile('^pattern$', flag)) 
Example 26
Project: ouroboros   Author: pybee   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def check_en_US_iso88591(self):
        locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
        self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
        self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
        self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
        self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5')) 
Example 27
Project: ouroboros   Author: pybee   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def check_en_US_utf8(self):
        locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
        self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
        self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
        self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
        self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5')) 
Example 28
Project: ndk-python   Author: gittor   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 29
Project: ndk-python   Author: gittor   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertNotEqual(re.compile('^pattern$', flag), None) 
Example 30
Project: kbe_server   Author: xiaohaoppy   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 31
Project: kbe_server   Author: xiaohaoppy   File: test_re.py    (license) View Source Project 5 votes vote down vote up
def test_flags(self):
        for flag in [re.I, re.M, re.X, re.S, re.L]:
            self.assertTrue(re.compile('^pattern$', flag)) 
Example 32
Project: ScanSql   Author: fiht   File: ??Mongo?????.py    (license) View Source Project 5 votes vote down vote up
def get_info(host):
    """get some infomation of a host"""
    req = ''
    try:
        req = requests.get(host,timeout=10)
        req.encoding = req.apparent_encoding    
        result = re.findall(re.compile('<title>(.*?)</title>',re.L),req.text)
        return result[0]
    except Exception as e:
        print(e)
        return None
#---------------------------------------------------------------------- 
Example 33
Project: backrefs   Author: facelessuser   File: bre.py    (license) View Source Project 4 votes vote down vote up
def iternext(self):
        """
        Iterate through characters of the string.

        Count escaped l, L, c, C, E and backslash as a single char.
        """

        if self.index > self.max_index:
            raise StopIteration

        char = self.string[self.index:self.index + 1]
        if char == self._b_slash:
            m = self._replace_ref.match(self.string[self.index + 1:])
            if m:
                ref = m.group(0)
                if len(ref) == 1 and ref in self._long_replace_refs:
                    if ref == self._hex:
                        raise SyntaxError('Format for byte is \\xXX!')
                    elif ref == self._group:
                        raise SyntaxError('Format for group is \\g<group_name_or_index>!')
                    elif ref == self._unicode_name:
                        raise SyntaxError('Format for Unicode name is \\N{name}!')
                    elif ref == self._unicode_narrow:  # pragma: no cover
                        raise SyntaxError('Format for Unicode is \\uXXXX!')
                    elif ref == self._unicode_wide:  # pragma: no cover
                        raise SyntaxError('Format for wide Unicode is \\UXXXXXXXX!')
                if self.use_format and (m.group(3) or m.group(4)):
                    char += self._b_slash
                    self.index -= 1
                if not self.use_format or not m.group(4):
                    char += m.group(1) if m.group(1) else m.group(2)
        elif self.use_format and char in (self._lc_bracket, self._rc_bracket):
            m = self._format_replace_group.match(self.string[self.index:])
            if m:
                if m.group(2):
                    char = m.group(2)
                else:
                    self.index += 1
            else:
                raise ValueError("Single unmatched curly bracket!")

        self.index += len(char)
        self.current = char
        return self.current