Python re.L Examples
The following are 30
code examples of re.L().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
re
, or try the search function
.
Example #1
Source File: Bayes.py From weiboanalysis with Apache License 2.0 | 7 votes |
def loadDataSet(path): # 返回每条微博的分词与标签 line_cut = [] label = [] with open(path, encoding="utf-8") as fp: for line in fp: temp = line.strip() try: sentence = temp[2:].lstrip() # 每条微博 label.append(int(temp[:2])) # 获取标注 word_list = [] sentence = str(sentence).replace('\u200b', '') for word in jieba.cut(sentence.strip()): p = re.compile(r'\w', re.L) result = p.sub("", word) if not result or result == ' ': # 空字符 continue word_list.append(word) word_list = list(set(word_list) - set(stop) - set('\u200b') - set(' ') - set('\u3000') - set('️')) line_cut.append(word_list) except Exception: continue return line_cut, label # 返回每条微博的分词和标注
Example #2
Source File: Bayes.py From weiboanalysis with Apache License 2.0 | 7 votes |
def loadDataSet(path): # 返回每条微博的分词与标签 line_cut = [] label = [] with open(path, encoding="utf-8") as fp: for line in fp: temp = line.strip() try: sentence = temp[2:].lstrip() # 每条微博 label.append(int(temp[:2])) # 获取标注 word_list = [] sentence = str(sentence).replace('\u200b', '') for word in jieba.cut(sentence.strip()): p = re.compile(b'\w', re.L) result = p.sub(b"", bytes(word, encoding="utf-8")).decode("utf-8") if not result or result == ' ': # 空字符 continue word_list.append(word) word_list = list(set(word_list) - set(stop) - set('\u200b') - set(' ') - set('\u3000') - set('️')) line_cut.append(word_list) except Exception: continue return line_cut, label # 返回每条微博的分词和标注
Example #3
Source File: tool.py From weiboanalysis with Apache License 2.0 | 6 votes |
def pynlp_build_key_word(filename): d={} with open(filename, encoding="utf-8") as fp: for line in fp: s = line p = re.compile(r'http?://.+$') # 正则表达式,提取URL result = p.findall(line) # 找出所有url if len(result): for i in result: s = s.replace(i, '') # 一个一个的删除 temp = pynlpir.segment(s, pos_tagging=False) # 分词 for i in temp: if '@' in i: temp.remove(i) # 删除分词中的名字 p = re.compile(r'\w', re.L) result = p.sub("", i) if not result or result == ' ': # 空字符 continue if len(i) > 1: # 避免大量无意义的词语进入统计范围 d[i] = d.get(i, 0) + 1 kw_list = sorted(d, key=lambda x: d[x], reverse=True) size = int(len(kw_list) * 0.2) # 取最前的30% mood = set(kw_list[:size]) return list(mood - set(stop)- set('\u200b') - set(' ') - set('\u3000'))
Example #4
Source File: tree.py From uproot with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _branch_flags(flags): flagsbyte = 0 for flag in flags: if flag == "i": flagsbyte += re.I elif flag == "L": flagsbyte += re.L elif flag == "m": flagsbyte += re.M elif flag == "s": flagsbyte += re.S elif flag == "u": flagsbyte += re.U elif flag == "x": flagsbyte += re.X return flagsbyte
Example #5
Source File: tree.py From uproot with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _branch_flags(flags): flagsbyte = 0 for flag in flags: if flag == "i": flagsbyte += re.I elif flag == "L": flagsbyte += re.L elif flag == "m": flagsbyte += re.M elif flag == "s": flagsbyte += re.S elif flag == "u": flagsbyte += re.U elif flag == "x": flagsbyte += re.X return flagsbyte
Example #6
Source File: tools.py From hadrian with Apache License 2.0 | 6 votes |
def getmatch(self, haystack): if not isinstance(haystack, basestring): return None flags = 0 if self.flags is not None: if "i" in self.flags or "I" in self.flags: flags |= re.I if "l" in self.flags or "L" in self.flags: flags |= re.L if "m" in self.flags or "M" in self.flags: flags |= re.M if "s" in self.flags or "S" in self.flags: flags |= re.S if "u" in self.flags or "U" in self.flags: flags |= re.U if "x" in self.flags or "X" in self.flags: flags |= re.X if re.match(self.pattern, haystack, flags=flags) is None: return None elif self.to is None: return Match(haystack, haystack) else: return Match(haystack, re.sub(self.pattern, self.to, haystack, flags=flags))
Example #7
Source File: utils.py From LDA_RecEngine with Apache License 2.0 | 6 votes |
def preprocessing(content): remove_punc = ('。 ; 。 、 」 「 , ( ) —').split(' ') ## preprocessing #1 : remove XXenglishXX and numbers preprocessing_1 = re.compile(r'\d*',re.L) ## only substitute numbers #preprocessing_1 = re.compile(r'\w*',re.L) ## substitute number & English content = preprocessing_1.sub("",content) ## preprocessing #2 : remove punctuation preprocessing_2 = re.compile('[%s]' % re.escape(string.punctuation)) content = preprocessing_2.sub("",content) ## preprocessing #3 : remove Chinese punctuation and multiple whitspaces content = content.replace('\n','') for punc in remove_punc: content = content.replace(punc,'') try: content = parsing.strip_multiple_whitespaces(content) except: print 'Warning : failed to strip whitespaces @ ' return content
Example #8
Source File: test_re.py From ironpython3 with Apache License 2.0 | 5 votes |
def check_en_US_utf8(self): locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I)) self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I)) self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
Example #9
Source File: test_re.py From ironpython3 with Apache License 2.0 | 5 votes |
def check_en_US_iso88591(self): locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
Example #10
Source File: test_sre_yield.py From sre_yield with Apache License 2.0 | 5 votes |
def testParseErrors(self): self.assertRaises(sre_yield.ParseError, sre_yield.AllStrings, "a", re.I) self.assertRaises(sre_yield.ParseError, sre_yield.AllStrings, "a", re.U) # Causes a failure inside sre_parse under Python 3.6 # self.assertRaises(sre_yield.ParseError, sre_yield.AllStrings, 'a', re.L)
Example #11
Source File: test_re.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_flags(self): for flag in [re.I, re.M, re.X, re.S, re.L]: self.assertTrue(re.compile('^pattern$', flag))
Example #12
Source File: test_re.py From gcblue with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) self.assertEqual(re.L, re.LOCALE) self.assertEqual(re.M, re.MULTILINE) self.assertEqual(re.S, re.DOTALL) self.assertEqual(re.X, re.VERBOSE)
Example #13
Source File: test_re.py From ironpython3 with Apache License 2.0 | 5 votes |
def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) self.assertEqual(re.L, re.LOCALE) self.assertEqual(re.M, re.MULTILINE) self.assertEqual(re.S, re.DOTALL) self.assertEqual(re.X, re.VERBOSE)
Example #14
Source File: test_re.py From gcblue with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_flags(self): for flag in [re.I, re.M, re.X, re.S, re.L]: self.assertNotEqual(re.compile('^pattern$', flag), None)
Example #15
Source File: header.py From quantipy with MIT License | 5 votes |
def _getMultRespDef(self, mrDef): """Get 'normal' multiple response defintions. This is a helper function for the multRespDefs getter function. A multiple response definition <mrDef> in the string format returned by the IO module is converted into a multiple response definition of the form multRespSet = {<setName>: {"setType": <setType>, "label": <lbl>, "varNames": <list_of_varNames>}}. SetType may be either 'D' (multiple dichotomy sets) or 'C' (multiple category sets). If setType is 'D', the multiple response definition also includes '"countedValue": countedValue'""" regex = b"\$(?P<setName>\S+)=(?P<setType>[CD])\n?" m = re.search(regex + b".*", mrDef, re.I | re.L) if not m: return {} setType = m.group("setType") if setType == b"C": # multiple category sets regex += b" (?P<lblLen>\d+) (?P<lblVarNames>.+) ?\n?" matches = re.findall(regex, mrDef, re.I) setName, setType, lblLen, lblVarNames = matches[0] else: # multiple dichotomy sets # \w+ won't always work (e.g. thai) --> \S+ regex += (b"(?P<valueLen>\d+) (?P<countedValue>\S+)" + b" (?P<lblLen>\d+) (?P<lblVarNames>.+) ?\n?") matches = re.findall(regex, mrDef, re.I | re.L) setName, setType, valueLen = matches[0][:3] countedValue, lblLen, lblVarNames = matches[0][3:] lbl = lblVarNames[:int(lblLen)] varNames = lblVarNames[int(lblLen):].split() multRespSet = {setName: {b"setType": setType, b"label": lbl, b"varNames": varNames}} if setType == b"D": multRespSet[setName][b"countedValue"] = countedValue return multRespSet
Example #16
Source File: test_re.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def check_en_US_iso88591(self): locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
Example #17
Source File: test_re.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def test_getlower(self): import _sre self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a')) self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC") self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
Example #18
Source File: test_re.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) self.assertEqual(re.L, re.LOCALE) self.assertEqual(re.M, re.MULTILINE) self.assertEqual(re.S, re.DOTALL) self.assertEqual(re.X, re.VERBOSE)
Example #19
Source File: test_re.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def test_flags(self): for flag in [re.I, re.M, re.X, re.S, re.A, re.U]: self.assertTrue(re.compile('^pattern$', flag)) for flag in [re.I, re.M, re.X, re.S, re.A, re.L]: self.assertTrue(re.compile(b'^pattern$', flag))
Example #20
Source File: test_re.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def check_en_US_iso88591(self): locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
Example #21
Source File: test_re.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def test_locale(self): self.check_flags(b'bytes pattern', re.L, "re.compile(b'bytes pattern', re.LOCALE)")
Example #22
Source File: test_re.py From medicare-demo with Apache License 2.0 | 5 votes |
def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) self.assertEqual(re.L, re.LOCALE) self.assertEqual(re.M, re.MULTILINE) self.assertEqual(re.S, re.DOTALL) self.assertEqual(re.X, re.VERBOSE)
Example #23
Source File: test_re.py From medicare-demo with Apache License 2.0 | 5 votes |
def test_flags(self): for flag in [re.I, re.M, re.X, re.S, re.L]: self.assertNotEqual(re.compile('^pattern$', flag), None)
Example #24
Source File: test_re.py From CTFCrackTools-V2 with GNU General Public License v3.0 | 5 votes |
def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) self.assertEqual(re.L, re.LOCALE) self.assertEqual(re.M, re.MULTILINE) self.assertEqual(re.S, re.DOTALL) self.assertEqual(re.X, re.VERBOSE)
Example #25
Source File: test_re.py From CTFCrackTools-V2 with GNU General Public License v3.0 | 5 votes |
def test_flags(self): for flag in [re.I, re.M, re.X, re.S, re.L]: self.assertTrue(re.compile('^pattern$', flag))
Example #26
Source File: test_re.py From CTFCrackTools-V2 with GNU General Public License v3.0 | 5 votes |
def check_en_US_iso88591(self): locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
Example #27
Source File: test_re.py From CTFCrackTools-V2 with GNU General Public License v3.0 | 5 votes |
def check_en_US_utf8(self): locale.setlocale(locale.LC_CTYPE, 'en_US.utf8') self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I)) self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I)) self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5')) self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
Example #28
Source File: test_re.py From CTFCrackTools with GNU General Public License v3.0 | 5 votes |
def test_constants(self): self.assertEqual(re.I, re.IGNORECASE) self.assertEqual(re.L, re.LOCALE) self.assertEqual(re.M, re.MULTILINE) self.assertEqual(re.S, re.DOTALL) self.assertEqual(re.X, re.VERBOSE)
Example #29
Source File: test_re.py From CTFCrackTools with GNU General Public License v3.0 | 5 votes |
def test_flags(self): for flag in [re.I, re.M, re.X, re.S, re.L]: self.assertTrue(re.compile('^pattern$', flag))
Example #30
Source File: test_re.py From CTFCrackTools with GNU General Public License v3.0 | 5 votes |
def check_en_US_iso88591(self): locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591') self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I)) self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I)) self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5')) self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5')) self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))