Python re.UNICODE() Examples
The following are code examples for showing how to use re.UNICODE(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.
Example 1
Project: mongodb-monitoring Author: jruaux File: regex.py (Apache License 2.0) View Source Project | 7 votes |
def str_flags_to_int(str_flags): flags = 0 if "i" in str_flags: flags |= re.IGNORECASE if "l" in str_flags: flags |= re.LOCALE if "m" in str_flags: flags |= re.MULTILINE if "s" in str_flags: flags |= re.DOTALL if "u" in str_flags: flags |= re.UNICODE if "x" in str_flags: flags |= re.VERBOSE return flags
Example 2
Project: mongodb-monitoring Author: jruaux File: __init__.py (Apache License 2.0) View Source Project | 7 votes |
def _encode_regex(name, value, dummy0, dummy1): """Encode a python regex or bson.regex.Regex.""" flags = value.flags # Python 2 common case if flags == 0: return b"\x0B" + name + _make_c_string_check(value.pattern) + b"\x00" # Python 3 common case elif flags == re.UNICODE: return b"\x0B" + name + _make_c_string_check(value.pattern) + b"u\x00" else: sflags = b"" if flags & re.IGNORECASE: sflags += b"i" if flags & re.LOCALE: sflags += b"l" if flags & re.MULTILINE: sflags += b"m" if flags & re.DOTALL: sflags += b"s" if flags & re.UNICODE: sflags += b"u" if flags & re.VERBOSE: sflags += b"x" sflags += b"\x00" return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags
Example 3
Project: Flask_Blog Author: sugarguo File: base.py (GNU General Public License v3.0) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a TINYTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(TINYTEXT, self).__init__(**kwargs)
Example 4
Project: Flask_Blog Author: sugarguo File: base.py (GNU General Public License v3.0) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a MEDIUMTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(MEDIUMTEXT, self).__init__(**kwargs)
Example 5
Project: Flask_Blog Author: sugarguo File: base.py (GNU General Public License v3.0) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a LONGTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(LONGTEXT, self).__init__(**kwargs)
Example 6
Project: Flask_Blog Author: sugarguo File: base.py (GNU General Public License v3.0) View Source Project | 6 votes |
def __init__(self, length=None, **kwargs): """Construct a VARCHAR. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(VARCHAR, self).__init__(length=length, **kwargs)
Example 7
Project: senf Author: quodlibet File: _stdlib.py (MIT License) View Source Project | 6 votes |
def expandvars(path): """ Args: path (pathlike): A path to expand Returns: `fsnative` Like :func:`python:os.path.expandvars` but supports unicode under Windows + Python 2 and always returns a `fsnative`. """ path = path2fsn(path) def repl_func(match): return environ.get(match.group(1), match.group(0)) path = re.compile(r"\$(\w+)", flags=re.UNICODE).sub(repl_func, path) if os.name == "nt": path = re.sub(r"%([^%]+)%", repl_func, path) return re.sub(r"\$\{([^\}]+)\}", repl_func, path)
Example 8
Project: quartz-browser Author: ksharindam File: utils.py (GNU General Public License v3.0) View Source Project | 6 votes |
def safe_filename(text, max_length=200): """Sanitizes filenames for many operating systems. :params text: The unsanitized pending filename. """ # Tidy up ugly formatted filenames. text = text.replace('_', ' ') text = text.replace(':', ' -') # NTFS forbids filenames containing characters in range 0-31 (0x00-0x1F) ntfs = [chr(i) for i in range(0, 31)] # Removing these SHOULD make most filename safe for a wide range of # operating systems. paranoid = ['\"', '\#', '\$', '\%', '\'', '\*', '\,', '\.', '\/', '\:', '\;', '\<', '\>', '\?', '\\', '\^', '\|', '\~', '\\\\'] blacklist = re.compile('|'.join(ntfs + paranoid), re.UNICODE) filename = blacklist.sub('', text) return truncate(filename)
Example 9
Project: noc-orchestrator Author: DirceuSilvaLabs File: regex.py (MIT License) View Source Project | 6 votes |
def str_flags_to_int(str_flags): flags = 0 if "i" in str_flags: flags |= re.IGNORECASE if "l" in str_flags: flags |= re.LOCALE if "m" in str_flags: flags |= re.MULTILINE if "s" in str_flags: flags |= re.DOTALL if "u" in str_flags: flags |= re.UNICODE if "x" in str_flags: flags |= re.VERBOSE return flags
Example 10
Project: noc-orchestrator Author: DirceuSilvaLabs File: regex.py (MIT License) View Source Project | 6 votes |
def str_flags_to_int(str_flags): flags = 0 if "i" in str_flags: flags |= re.IGNORECASE if "l" in str_flags: flags |= re.LOCALE if "m" in str_flags: flags |= re.MULTILINE if "s" in str_flags: flags |= re.DOTALL if "u" in str_flags: flags |= re.UNICODE if "x" in str_flags: flags |= re.VERBOSE return flags
Example 11
Project: noc-orchestrator Author: DirceuSilvaLabs File: regex.py (MIT License) View Source Project | 6 votes |
def str_flags_to_int(str_flags): flags = 0 if "i" in str_flags: flags |= re.IGNORECASE if "l" in str_flags: flags |= re.LOCALE if "m" in str_flags: flags |= re.MULTILINE if "s" in str_flags: flags |= re.DOTALL if "u" in str_flags: flags |= re.UNICODE if "x" in str_flags: flags |= re.VERBOSE return flags
Example 12
Project: noc-orchestrator Author: DirceuSilvaLabs File: regex.py (MIT License) View Source Project | 6 votes |
def str_flags_to_int(str_flags): flags = 0 if "i" in str_flags: flags |= re.IGNORECASE if "l" in str_flags: flags |= re.LOCALE if "m" in str_flags: flags |= re.MULTILINE if "s" in str_flags: flags |= re.DOTALL if "u" in str_flags: flags |= re.UNICODE if "x" in str_flags: flags |= re.VERBOSE return flags
Example 13
Project: sublime-text-3-packages Author: nickjj File: inlinepatterns.py (MIT License) View Source Project | 6 votes |
def __init__(self, pattern, markdown_instance=None): """ Create an instant of an inline pattern. Keyword arguments: * pattern: A regular expression that matches a pattern """ self.pattern = pattern self.compiled_re = re.compile("^(.*?)%s(.*)$" % pattern, re.DOTALL | re.UNICODE) # Api for Markdown to pass safe_mode into instance self.safe_mode = False if markdown_instance: self.markdown = markdown_instance
Example 14
Project: CodingDojo Author: ComputerSocietyUNB File: urlresolvers.py (license) View Source Project | 6 votes |
def regex(self): """ Returns a compiled regular expression, depending upon the activated language-code. """ language_code = get_language() if language_code not in self._regex_dict: if isinstance(self._regex, six.string_types): regex = self._regex else: regex = force_text(self._regex) try: compiled_regex = re.compile(regex, re.UNICODE) except re.error as e: raise ImproperlyConfigured( '"%s" is not a valid regular expression: %s' % (regex, six.text_type(e))) self._regex_dict[language_code] = compiled_regex return self._regex_dict[language_code]
Example 15
Project: gui_tool Author: UAVCAN File: __init__.py (license) View Source Project | 6 votes |
def _do_match(self, text): if self.use_regex: try: flags = re.UNICODE if not self.case_sensitive: flags |= re.IGNORECASE return bool(re.findall(self.pattern, text, flags=flags)) except Exception as ex: logger.warning('Regular expression match failed', exc_info=True) raise self.BadPatternException(str(ex)) else: if self.case_sensitive: pattern = self.pattern else: pattern = self.pattern.lower() text = text.lower() return pattern in text
Example 16
Project: thesaurus_query.vim Author: Ron89 File: woxikon_de_lookup.py (license) View Source Project | 6 votes |
def obtainGroups(webcontent, groupNum): synonym_list = [] for group in range(groupNum): while not re.search("synonyms-list-group", webcontent.readline(), re.UNICODE): continue meaning = re.search("Meaning: <b>([^<]+)</b>", webcontent.readline(), re.UNICODE).group(1) webcontent.readline() # </div> line webcontent.readline() # synonyms-list_content line sublist = webcontent.readline().split(',') subSynList = [] for wordContainer in sublist: potential_synonym = re.search("<a href=[^>]+>([^<]+)</a>", wordContainer, re.UNICODE) if potential_synonym: subSynList.append(potential_synonym.group(1)) synonym_list.append([meaning, subSynList]) return synonym_list
Example 17
Project: thesaurus_query.vim Author: Ron89 File: woxikon_de_lookup.py (license) View Source Project | 6 votes |
def _parser(webcontent): end_tag_count=4 pointer = webcontent.tell() end = len(webcontent.getvalue()) while pointer<end: line_curr = webcontent.readline() found = re.search("Found ([0-9]+) synonym[ a-z]+([0-9]+) group", line_curr, re.UNICODE) notFound = re.search("<div class=\"no-results\">", line_curr, re.UNICODE) if found: groupNum = int(found.group(2)) synonymNum = int(found.group(1)) synonym_list = obtainGroups(webcontent, groupNum) webcontent.close() return synonym_list if notFound: webcontent.close() return [] webcontent.close() return synonym_list
Example 18
Project: QXSConsolas Author: qxsch File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a TINYTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(TINYTEXT, self).__init__(**kwargs)
Example 19
Project: QXSConsolas Author: qxsch File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a MEDIUMTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(MEDIUMTEXT, self).__init__(**kwargs)
Example 20
Project: QXSConsolas Author: qxsch File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a LONGTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(LONGTEXT, self).__init__(**kwargs)
Example 21
Project: QXSConsolas Author: qxsch File: base.py (license) View Source Project | 6 votes |
def __init__(self, length=None, **kwargs): """Construct a VARCHAR. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(VARCHAR, self).__init__(length=length, **kwargs)
Example 22
Project: TuShare Author: andyzsf File: reference.py (license) View Source Project | 6 votes |
def _fun_into(x): if ct.PY3: reg1 = re.compile(r'??(.*?)?', re.UNICODE) reg2 = re.compile(r'??(.*?)?', re.UNICODE) res1 = reg1.findall(x) res2 = reg2.findall(x) res1 = 0 if len(res1)<1 else float(res1[0]) res2 = 0 if len(res2)<1 else float(res2[0]) return res1 + res2 else: if isinstance(x, unicode): s1 = unicode('??','utf-8') s2 = unicode('??','utf-8') s3 = unicode('?','utf-8') reg1 = re.compile(r'%s(.*?)%s'%(s1, s3), re.UNICODE) reg2 = re.compile(r'%s(.*?)%s'%(s2, s3), re.UNICODE) res1 = reg1.findall(x) res2 = reg2.findall(x) res1 = 0 if len(res1)<1 else float(res1[0]) res2 = 0 if len(res2)<1 else float(res2[0]) return res1 + res2 else: return 0
Example 23
Project: touch-pay-client Author: HackPucBemobi File: validators.py (license) View Source Project | 6 votes |
def __init__(self, expression, error_message='Invalid expression', strict=False, search=False, extract=False, is_unicode=False): if strict or not search: if not expression.startswith('^'): expression = '^(%s)' % expression if strict: if not expression.endswith('$'): expression = '(%s)$' % expression if is_unicode: if not isinstance(expression, unicodeT): expression = expression.decode('utf8') self.regex = re.compile(expression, re.UNICODE) else: self.regex = re.compile(expression) self.error_message = error_message self.extract = extract self.is_unicode = is_unicode or (not(PY2))
Example 24
Project: transpyler Author: Transpyler File: pygments.py (license) View Source Project | 6 votes |
def transpyler_lexer_factory(transpyler): """ Return a Pygments lexer class for the given transpyler. """ def analyse_text(text): return shebang_matches(text, r'pythonw?3(\.\d)?') return type( transpyler.pygments_class_name, (Python3Lexer,), dict( analyse_text=analyse_text, name=transpyler.name, aliases=[transpyler.display_name], filenames=transpyler.file_extensions, mimetypes=transpyler.mimetypes, flags=re.MULTILINE | re.UNICODE, uni_name="[%s][%s]*" % (uni.xid_start, uni.xid_continue), tokens=make_transpyled_tokens(transpyler), ) )
Example 25
Project: aws-cfn-plex Author: lordmuffin File: states.py (license) View Source Project | 6 votes |
def build_regexp(definition, compile=True): """ Build, compile and return a regular expression based on `definition`. :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts), where "parts" is a list of regular expressions and/or regular expression definitions to be joined into an or-group. """ name, prefix, suffix, parts = definition part_strings = [] for part in parts: if type(part) is tuple: part_strings.append(build_regexp(part, None)) else: part_strings.append(part) or_group = '|'.join(part_strings) regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals() if compile: return re.compile(regexp, re.UNICODE) else: return regexp
Example 26
Project: filters Author: eflglobal File: string_test.py (license) View Source Project | 6 votes |
def test_pass_precompiled_regex(self): """ You can alternatively provide a precompiled regex to the Filter instead of a string pattern. """ # Compile our own pattern so that we can specify the # ``IGNORECASE`` flag. # Note that you are responsible for adding the ``UNICODE`` flag # to your compiled regex! # noinspection SpellCheckingInspection pattern = re.compile(r'\btest\b', re.IGNORECASE | re.UNICODE) self.assertFilterPasses( self._filter('test march of the TEST penguins', pattern=pattern), ['test', 'TEST'], )
Example 27
Project: filters Author: eflglobal File: string_test.py (license) View Source Project | 6 votes |
def test_pass_regex_library_support(self): """ The Regex Filter also supports precompiled patterns using the ``regex`` library. """ # Roughly, "Hi there!" in Burmese. word = '\u101f\u102d\u102f\u1004\u103a\u1038' # Note that :py:func:`regex.compile` automatically adds the # ``UNICODE`` flag for you when the pattern is a unicode. pattern = regex.compile(r'\w+') self.assertFilterPasses( self._filter(word, pattern=pattern), [word], )
Example 28
Project: filters Author: eflglobal File: string_test.py (license) View Source Project | 6 votes |
def test_pass_precompiled_regex(self): """ You can alternatively provide a precompiled regex to the Filter instead of a string pattern. """ # Compile our own pattern so that we can specify the # ``IGNORECASE`` flag. # Note that you are responsible for adding the ``UNICODE`` flag # to your compiled regex! # noinspection SpellCheckingInspection pattern = re.compile(r'\btest\b', re.IGNORECASE | re.UNICODE) self.assertFilterPasses( self._filter('test march of the TEST penguins', pattern=pattern), ['', ' march of the ', ' penguins'], )
Example 29
Project: filters Author: eflglobal File: string_test.py (license) View Source Project | 6 votes |
def test_pass_regex_library_support(self): """ The Regex Filter also supports precompiled patterns using the ``regex`` library. """ # Roughly, "Hi there!" in Burmese. word = '\u101f\u102d\u102f\u1004\u103a\u1038!' # Note that :py:func:`regex.compile` automatically adds the # ``UNICODE`` flag for you when the pattern is a unicode. pattern = regex.compile(r'\w+') self.assertFilterPasses( self._filter(word, pattern=pattern), ['', '!'], )
Example 30
Project: AshsSDK Author: thehappydinoa File: states.py (license) View Source Project | 6 votes |
def build_regexp(definition, compile=True): """ Build, compile and return a regular expression based on `definition`. :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts), where "parts" is a list of regular expressions and/or regular expression definitions to be joined into an or-group. """ name, prefix, suffix, parts = definition part_strings = [] for part in parts: if type(part) is tuple: part_strings.append(build_regexp(part, None)) else: part_strings.append(part) or_group = '|'.join(part_strings) regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals() if compile: return re.compile(regexp, re.UNICODE) else: return regexp
Example 31
Project: flasky Author: RoseOu File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a TINYTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(TINYTEXT, self).__init__(**kwargs)
Example 32
Project: flasky Author: RoseOu File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a MEDIUMTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(MEDIUMTEXT, self).__init__(**kwargs)
Example 33
Project: flasky Author: RoseOu File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a LONGTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(LONGTEXT, self).__init__(**kwargs)
Example 34
Project: flasky Author: RoseOu File: base.py (license) View Source Project | 6 votes |
def __init__(self, length=None, **kwargs): """Construct a VARCHAR. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(VARCHAR, self).__init__(length=length, **kwargs)
Example 35
Project: macos-st-packages Author: zce File: inlinepatterns.py (license) View Source Project | 6 votes |
def __init__(self, pattern, markdown_instance=None): """ Create an instant of an inline pattern. Keyword arguments: * pattern: A regular expression that matches a pattern """ self.pattern = pattern self.compiled_re = re.compile("^(.*?)%s(.*)$" % pattern, re.DOTALL | re.UNICODE) # Api for Markdown to pass safe_mode into instance self.safe_mode = False if markdown_instance: self.markdown = markdown_instance
Example 36
Project: true_review_web2py Author: lucadealfaro File: validators.py (license) View Source Project | 6 votes |
def __init__(self, expression, error_message='Invalid expression', strict=False, search=False, extract=False, is_unicode=False): if strict or not search: if not expression.startswith('^'): expression = '^(%s)' % expression if strict: if not expression.endswith('$'): expression = '(%s)$' % expression if is_unicode: if not isinstance(expression, unicode): expression = expression.decode('utf8') self.regex = re.compile(expression, re.UNICODE) else: self.regex = re.compile(expression) self.error_message = error_message self.extract = extract self.is_unicode = is_unicode
Example 37
Project: oa_qian Author: sunqb File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a TINYTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(TINYTEXT, self).__init__(**kwargs)
Example 38
Project: oa_qian Author: sunqb File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a MEDIUMTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(MEDIUMTEXT, self).__init__(**kwargs)
Example 39
Project: oa_qian Author: sunqb File: base.py (license) View Source Project | 6 votes |
def __init__(self, **kwargs): """Construct a LONGTEXT. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(LONGTEXT, self).__init__(**kwargs)
Example 40
Project: oa_qian Author: sunqb File: base.py (license) View Source Project | 6 votes |
def __init__(self, length=None, **kwargs): """Construct a VARCHAR. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(VARCHAR, self).__init__(length=length, **kwargs)
Example 41
Project: AlphaPy Author: ScottFreeLLC File: utilities.py (license) View Source Project | 6 votes |
def valid_name(name): r"""Determine whether or not the given string is a valid alphanumeric string. Parameters ---------- name : str An alphanumeric identifier. Returns ------- result : bool ``True`` if the name is valid, else ``False``. Examples -------- >>> valid_name('alpha') # True >>> valid_name('!alpha') # False """ identifier = re.compile(r"^[^\d\W]\w*\Z", re.UNICODE) result = re.match(identifier, name) return result is not None
Example 42
Project: spc Author: whbrewer File: validators.py (license) View Source Project | 6 votes |
def __init__(self, expression, error_message='invalid expression', strict=False, search=False, extract=False, unicode=False): if strict or not search: if not expression.startswith('^'): expression = '^(%s)' % expression if strict: if not expression.endswith('$'): expression = '(%s)$' % expression if unicode: if not isinstance(expression,unicode): expression = expression.decode('utf8') self.regex = re.compile(expression,re.UNICODE) else: self.regex = re.compile(expression) self.error_message = error_message self.extract = extract self.unicode = unicode
Example 43
Project: bookmark-archiver Author: pirate File: parse.py (license) View Source Project | 6 votes |
def parse_pocket_export(html_file): """Parse Pocket-format bookmarks export files (produced by getpocket.com/export/)""" html_file.seek(0) pattern = re.compile("^\\s*<li><a href=\"(.+)\" time_added=\"(\\d+)\" tags=\"(.*)\">(.+)</a></li>", re.UNICODE) for line in html_file: # example line # <li><a href="http://example.com/ time_added="1478739709" tags="tag1,tag2">example title</a></li> match = pattern.search(line) if match: fixed_url = match.group(1).replace('http://www.readability.com/read?url=', '') # remove old readability prefixes to get original url time = datetime.fromtimestamp(float(match.group(2))) info = { 'url': fixed_url, 'domain': domain(fixed_url), 'base_url': base_url(fixed_url), 'timestamp': str(time.timestamp()), 'tags': match.group(3), 'title': match.group(4).replace(' — Readability', '').replace('http://www.readability.com/read?url=', '') or base_url(fixed_url), 'sources': [html_file.name], } info['type'] = get_link_type(info) yield info
Example 44
Project: backrefs Author: facelessuser File: bre.py (license) View Source Project | 6 votes |
def _apply_search_backrefs(pattern, flags=0): """Apply the search backrefs to the search pattern.""" if isinstance(pattern, (compat.string_type, compat.binary_type)): re_verbose = bool(VERBOSE & flags) re_unicode = None if compat.PY3 and bool(ASCII & flags): re_unicode = False elif bool(UNICODE & flags): re_unicode = True pattern = SearchTemplate(pattern, re_verbose, re_unicode).apply() elif isinstance(pattern, RE_TYPE): if flags: raise ValueError("Cannot process flags argument with a compiled pattern!") else: raise TypeError("Not a string or compiled pattern!") return pattern
Example 45
Project: hugo_similar_posts Author: elbaulp File: similar_posts.py (license) View Source Project | 6 votes |
def preprocessor(text): # TODO: Remove punctuation # Remove frontmatter text = re.sub(r"u'", '', text) text = re.sub(r'^\s*---.*---\s*$', '', text, flags=re.DOTALL | re.MULTILINE | re.UNICODE) text = re.sub(r'^\s*\+{3}.*\+{3}\s*$', '', text, flags=re.DOTALL | re.MULTILINE | re.UNICODE) text = re.sub(r'^\s*```.*?```\s*$', '', text, flags=re.DOTALL | re.MULTILINE) text = re.sub(r'`[^`]*`', '', text) text = re.sub(r'<[^>]*>', '', text, flags=re.UNICODE | re.DOTALL | re.MULTILINE) text = text.replace('<!--more--><!--ad-->', '') text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE | re.UNICODE) text = re.sub(r'[#|*|\[\]:.,]', '', text, flags=re.UNICODE) text = re.sub(r'[!"#$%&\'()*+,-./:;<=>[email protected]\[\\\]^_`{|}~]', '', text) text = re.sub(r'\d*', '', text) text = text.lower() text = re.sub(r'[\W]+', ' ', text.lower(), flags=re.UNICODE) return text
Example 46
Project: python- Author: secondtonone1 File: tokenize.py (license) View Source Project | 5 votes |
def _compile(expr): return re.compile(expr, re.UNICODE) # Note that since _all_string_prefixes includes the empty string, # StringPrefix can be the empty string (making it optional).
Example 47
Project: tashaphyne Author: linuxscout File: stemming.py (GNU General Public License v3.0) View Source Project | 5 votes |
def __init__(self): #load affix information # pass self.prefix_letters = stem_const.DEFAULT_PREFIX_LETTERS self.suffix_letters = stem_const.DEFAULT_SUFFIX_LETTERS self.infix_letters = stem_const.DEFAULT_INFIX_LETTERS self.max_prefix_length = stem_const.DEFAULT_MAX_PREFIX self.max_suffix_length = stem_const.DEFAULT_MAX_SUFFIX self.min_stem_length = stem_const.DEFAULT_MIN_STEM self.joker = stem_const.DEFAULT_JOKER self.prefix_list = stem_const.DEFAULT_PREFIX_LIST self.suffix_list = stem_const.DEFAULT_SUFFIX_LIST self.word = u"" self.unvocalized = u"" self.normalized = u"" self.starword = u"" self.root = u"" self.left = 0 self.right = 0 self.segment_list = [] #token pattern # letters and harakat self.token_pat = re.compile(r"[^\w\u064b-\u0652']+", re.UNICODE) self.prefixes_tree = self._create_prefix_tree(self.prefix_list) self.suffixes_tree = self._create_suffix_tree(self.suffix_list) ###################################################################### #{ Attribut Functions ######################################################################
Example 48
Project: yargy Author: natasha File: tokenizer.py (MIT License) View Source Project | 5 votes |
def compile(self, rules): mapping = {} patterns = [] for rule in rules: name = 'rule_{id}'.format(id=id(rule)) pattern = r'(?P<{name}>{pattern})'.format( name=name, pattern=rule.pattern ) mapping[name] = rule patterns.append(pattern) pattern = '|'.join(patterns) regexp = re.compile(pattern, re.UNICODE | re.IGNORECASE) return regexp, mapping
Example 49
Project: mongodb-monitoring Author: jruaux File: regex.py (Apache License 2.0) View Source Project | 5 votes |
def from_native(cls, regex): """Convert a Python regular expression into a ``Regex`` instance. Note that in Python 3, a regular expression compiled from a :class:`str` has the ``re.UNICODE`` flag set. If it is undesirable to store this flag in a BSON regular expression, unset it first:: >>> pattern = re.compile('.*') >>> regex = Regex.from_native(pattern) >>> regex.flags ^= re.UNICODE >>> db.collection.insert({'pattern': regex}) :Parameters: - `regex`: A regular expression object from ``re.compile()``. .. warning:: Python regular expressions use a different syntax and different set of flags than MongoDB, which uses `PCRE`_. A regular expression retrieved from the server may not compile in Python, or may match a different set of strings in Python than when used in a MongoDB query. .. _PCRE: http://www.pcre.org/ """ if not isinstance(regex, RE_TYPE): raise TypeError( "regex must be a compiled regular expression, not %s" % type(regex)) return Regex(regex.pattern, regex.flags)
Example 50
Project: Flask_Blog Author: sugarguo File: base.py (GNU General Public License v3.0) View Source Project | 5 votes |
def __init__(self, length=None, **kw): """Construct a TEXT. :param length: Optional, if provided the server may optimize storage by substituting the smallest TEXT type sufficient to store ``length`` characters. :param charset: Optional, a column-level character set for this string value. Takes precedence to 'ascii' or 'unicode' short-hand. :param collation: Optional, a column-level collation for this string value. Takes precedence to 'binary' short-hand. :param ascii: Defaults to False: short-hand for the ``latin1`` character set, generates ASCII in schema. :param unicode: Defaults to False: short-hand for the ``ucs2`` character set, generates UNICODE in schema. :param national: Optional. If true, use the server's configured national character set. :param binary: Defaults to False: short-hand, pick the binary collation type that matches the column's character set. Generates BINARY in schema. This does not affect the type of data stored, only the collation of character data. """ super(TEXT, self).__init__(length=length, **kw)