Python re.VERBOSE Examples

The following are 30 code examples for showing how to use re.VERBOSE(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module re , or try the search function .

Example 1
Project: recipes-py   Author: luci   File: magic_check_fn.py    License: Apache License 2.0 9 votes vote down vote up
def render_re(regex):
  """Renders a repr()-style value for a compiled regular expression."""
  actual_flags = []
  if regex.flags:
    flags = [
      (re.IGNORECASE, 'IGNORECASE'),
      (re.LOCALE, 'LOCALE'),
      (re.UNICODE, 'UNICODE'),
      (re.MULTILINE, 'MULTILINE'),
      (re.DOTALL, 'DOTALL'),
      (re.VERBOSE, 'VERBOSE'),
    ]
    for val, name in flags:
      if regex.flags & val:
        actual_flags.append(name)
  if actual_flags:
    return 're.compile(%r, %s)' % (regex.pattern, '|'.join(actual_flags))
  else:
    return 're.compile(%r)' % regex.pattern 
Example 2
Project: pyscf   Author: pyscf   File: m_siesta_ion_xml.py    License: Apache License 2.0 7 votes vote down vote up
def str2int(string):
  numeric_const_pattern = r"""
  [-+]? # optional sign
  (?:
    (?: \d* \. \d+ ) # .1 .12 .123 etc 9.1 etc 98.1 etc
    |
    (?: \d+ \.? ) # 1. 12. 123. etc 1 12 123 etc
  )
  # followed by optional exponent part if desired
  (?: [Ee] [+-]? \d+ ) ?
  """
  rx = re.compile(numeric_const_pattern, re.VERBOSE)
  nb = rx.findall(string)
  for i in enumerate(nb): nb[i[0]] = int(i[1])
  return np.array(nb)

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1 
Example 3
Project: jawfish   Author: war-and-code   File: support.py    License: MIT License 6 votes vote down vote up
def set_memlimit(limit):
    global max_memuse
    global real_max_memuse
    sizes = {
        'k': 1024,
        'm': _1M,
        'g': _1G,
        't': 1024*_1G,
    }
    m = re.match(r'(\d+(\.\d+)?) (K|M|G|T)b?$', limit,
                 re.IGNORECASE | re.VERBOSE)
    if m is None:
        raise ValueError('Invalid memory limit %r' % (limit,))
    memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()])
    real_max_memuse = memlimit
    if memlimit > MAX_Py_ssize_t:
        memlimit = MAX_Py_ssize_t
    if memlimit < _2G - 1:
        raise ValueError('Memory limit %r too low to be useful' % (limit,))
    max_memuse = memlimit 
Example 4
Project: pyscf   Author: pyscf   File: m_siesta_ion_xml.py    License: Apache License 2.0 6 votes vote down vote up
def str2float(string):
  numeric_const_pattern = r"""
  [-+]? # optional sign
  (?:
    (?: \d* \. \d+ ) # .1 .12 .123 etc 9.1 etc 98.1 etc
    |
    (?: \d+ \.? ) # 1. 12. 123. etc 1 12 123 etc
  )
  # followed by optional exponent part if desired
  (?: [Ee] [+-]? \d+ ) ?
  """
  rx = re.compile(numeric_const_pattern, re.VERBOSE)
  nb = rx.findall(string)
  for i in enumerate(nb): nb[i[0]] = float(i[1])

  return np.array(nb) 
Example 5
Project: razzy-spinner   Author: rafasashi   File: ycoe.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _parse(s):
    rx_pattern = re.compile(r"""
        \(CODE .*\)
        |\(ID .*\d\)
    """, re.VERBOSE|re.UNICODE)
    s = re.sub(rx_pattern, '', s)
    s = split(s, '\n')
    fullPhrase = ""
    # loop through the sentences and parse each sentence
    # every time a new sentence marker is found
    for sent in s:
        if list(tokenize.regexp(sent, r'^\(')) != []:
            fullPhrase = _strip_spaces(fullPhrase)               
            if fullPhrase != "":
                yield fullPhrase
            fullPhrase = sent
        else:
            fullPhrase += sent

    # Get the last of the buffer and output a yield
    fullPhrase = _strip_spaces(fullPhrase)
    if fullPhrase != "":
        yield fullPhrase 
Example 6
Project: razzy-spinner   Author: rafasashi   File: relextract.py    License: GNU General Public License v3.0 6 votes vote down vote up
def conllesp():
    from nltk.corpus import conll2002

    de = """
    .*
    (
    de/SP|
    del/SP
    )
    """
    DE = re.compile(de, re.VERBOSE)

    print()
    print("Spanish CoNLL2002: de(ORG, LOC) -- just the first 10 clauses:")
    print("=" * 45)
    rels = [rel for doc in conll2002.chunked_sents('esp.train')
            for rel in extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)]
    for r in rels[:10]: print(clause(r, relsym='DE'))
    print() 
Example 7
Project: verge3d-blender-addon   Author: Soft8Soft   File: support.py    License: GNU General Public License v3.0 6 votes vote down vote up
def set_memlimit(limit):
    global max_memuse
    global real_max_memuse
    sizes = {
        'k': 1024,
        'm': _1M,
        'g': _1G,
        't': 1024*_1G,
    }
    m = re.match(r'(\d+(\.\d+)?) (K|M|G|T)b?$', limit,
                 re.IGNORECASE | re.VERBOSE)
    if m is None:
        raise ValueError('Invalid memory limit %r' % (limit,))
    memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()])
    real_max_memuse = memlimit
    if memlimit > MAX_Py_ssize_t:
        memlimit = MAX_Py_ssize_t
    if memlimit < _2G - 1:
        raise ValueError('Memory limit %r too low to be useful' % (limit,))
    max_memuse = memlimit 
Example 8
Project: misp42splunk   Author: remg427   File: ip_math.py    License: GNU Lesser General Public License v3.0 6 votes vote down vote up
def is_valid_ip(addr):
    '''Validate an IPV4 address.

    :param addr: IP address to validate.
    :type addr: ``string``
    :returns: True if is valid else False.
    :rtype: ``bool``
    '''

    ip_rx = re.compile(r'''
        ^(((
              [0-1]\d{2}                  # matches 000-199
            | 2[0-4]\d                    # matches 200-249
            | 25[0-5]                     # matches 250-255
            | \d{1,2}                     # matches 0-9, 00-99
        )\.){3})                          # 3 of the preceding stanzas
        ([0-1]\d{2}|2[0-4]\d|25[0-5]|\d{1,2})$     # final octet
    ''', re.VERBOSE)

    try:
        return ip_rx.match(addr.strip())
    except AttributeError:
        # Value was not a string
        return False 
Example 9
Project: misp42splunk   Author: remg427   File: ip_math.py    License: GNU Lesser General Public License v3.0 6 votes vote down vote up
def is_valid_ip(addr):
    '''Validate an IPV4 address.

    :param addr: IP address to validate.
    :type addr: ``string``
    :returns: True if is valid else False.
    :rtype: ``bool``
    '''

    ip_rx = re.compile(r'''
        ^(((
              [0-1]\d{2}                  # matches 000-199
            | 2[0-4]\d                    # matches 200-249
            | 25[0-5]                     # matches 250-255
            | \d{1,2}                     # matches 0-9, 00-99
        )\.){3})                          # 3 of the preceding stanzas
        ([0-1]\d{2}|2[0-4]\d|25[0-5]|\d{1,2})$     # final octet
    ''', re.VERBOSE)

    try:
        return ip_rx.match(addr.strip())
    except AttributeError:
        # Value was not a string
        return False 
Example 10
Project: spectree   Author: 0b01001001   File: falcon_plugin.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, spectree):
        super().__init__(spectree)
        from falcon.routing.compiled import _FIELD_PATTERN

        self.FIELD_PATTERN = _FIELD_PATTERN
        # NOTE from `falcon.routing.compiled.CompiledRouterNode`
        self.ESCAPE = r'[\.\(\)\[\]\?\$\*\+\^\|]'
        self.ESCAPE_TO = r'\\\g<0>'
        self.EXTRACT = r'{\2}'
        # NOTE this regex is copied from werkzeug.routing._converter_args_re and
        # modified to support only int args
        self.INT_ARGS = re.compile(r'''
            ((?P<name>\w+)\s*=\s*)?
            (?P<value>\d+)\s*
        ''', re.VERBOSE)
        self.INT_ARGS_NAMES = ('num_digits', 'min', 'max') 
Example 11
Project: recruit   Author: Frank-qlu   File: regex.py    License: Apache License 2.0 6 votes vote down vote up
def str_flags_to_int(str_flags):
    flags = 0
    if "i" in str_flags:
        flags |= re.IGNORECASE
    if "l" in str_flags:
        flags |= re.LOCALE
    if "m" in str_flags:
        flags |= re.MULTILINE
    if "s" in str_flags:
        flags |= re.DOTALL
    if "u" in str_flags:
        flags |= re.UNICODE
    if "x" in str_flags:
        flags |= re.VERBOSE

    return flags 
Example 12
Project: ironpython2   Author: IronLanguages   File: __init__.py    License: Apache License 2.0 6 votes vote down vote up
def set_memlimit(limit):
    global max_memuse
    global real_max_memuse
    sizes = {
        'k': 1024,
        'm': _1M,
        'g': _1G,
        't': 1024*_1G,
    }
    m = re.match(r'(\d+(\.\d+)?) (K|M|G|T)b?$', limit,
                 re.IGNORECASE | re.VERBOSE)
    if m is None:
        raise ValueError('Invalid memory limit %r' % (limit,))
    memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()])
    real_max_memuse = memlimit
    if memlimit > MAX_Py_ssize_t:
        memlimit = MAX_Py_ssize_t
    if memlimit < _2G - 1:
        raise ValueError('Memory limit %r too low to be useful' % (limit,))
    max_memuse = memlimit 
Example 13
Project: ironpython2   Author: IronLanguages   File: test_commands.py    License: Apache License 2.0 6 votes vote down vote up
def test_getstatus(self):
        # This pattern should match 'ls -ld /.' on any posix
        # system, however perversely configured.  Even on systems
        # (e.g., Cygwin) where user and group names can have spaces:
        #     drwxr-xr-x   15 Administ Domain U     4096 Aug 12 12:50 /
        #     drwxr-xr-x   15 Joe User My Group     4096 Aug 12 12:50 /
        # Note that the first case above has a space in the group name
        # while the second one has a space in both names.
        # Special attributes supported:
        #   + = has ACLs
        #   @ = has Mac OS X extended attributes
        #   . = has a SELinux security context
        pat = r'''d.........   # It is a directory.
                  [.+@]?       # It may have special attributes.
                  \s+\d+       # It has some number of links.
                  [^/]*        # Skip user, group, size, and date.
                  /\.          # and end with the name of the file.
               '''

        with check_warnings((".*commands.getstatus.. is deprecated",
                             DeprecationWarning)):
            self.assertTrue(re.match(pat, commands.getstatus("/."), re.VERBOSE)) 
Example 14
Project: locality-sensitive-hashing   Author: singhj   File: plugintest.py    License: MIT License 6 votes vote down vote up
def remove_stack_traces(out):
    # this regexp taken from Python 2.5's doctest
    traceback_re = re.compile(r"""
        # Grab the traceback header.  Different versions of Python have
        # said different things on the first traceback line.
        ^(?P<hdr> Traceback\ \(
            (?: most\ recent\ call\ last
            |   innermost\ last
            ) \) :
        )
        \s* $                   # toss trailing whitespace on the header.
        (?P<stack> .*?)         # don't blink: absorb stuff until...
        ^(?=\w)                 #     a line *starts* with alphanum.
        .*?(?P<exception> \w+ ) # exception name
        (?P<msg> [:\n] .*)      # the rest
        """, re.VERBOSE | re.MULTILINE | re.DOTALL)
    blocks = []
    for block in blankline_separated_blocks(out):
        blocks.append(traceback_re.sub(r"\g<hdr>\n...\n\g<exception>\g<msg>", block))
    return "".join(blocks) 
Example 15
Project: vnpy_crypto   Author: birforce   File: datetools.py    License: MIT License 6 votes vote down vote up
def date_parser(timestr, parserinfo=None, **kwargs):
    """
    Uses dateutil.parser.parse, but also handles monthly dates of the form
    1999m4, 1999:m4, 1999:mIV, 1999mIV and the same for quarterly data
    with q instead of m. It is not case sensitive. The default for annual
    data is the end of the year, which also differs from dateutil.
    """
    flags = re.IGNORECASE | re.VERBOSE
    if re.search(_q_pattern, timestr, flags):
        y,q = timestr.replace(":","").lower().split('q')
        month, day = _quarter_to_day[q.upper()]
        year = int(y)
    elif re.search(_m_pattern, timestr, flags):
        y,m = timestr.replace(":","").lower().split('m')
        month, day = _month_to_day[m.upper()]
        year = int(y)
        if _is_leap(y) and month == 2:
            day += 1
    elif re.search(_y_pattern, timestr, flags):
        month, day = 12, 31
        year = int(timestr)
    else:
        return to_datetime(timestr, **kwargs)

    return datetime.datetime(year, month, day) 
Example 16
Project: vnpy_crypto   Author: birforce   File: regex.py    License: MIT License 6 votes vote down vote up
def str_flags_to_int(str_flags):
    flags = 0
    if "i" in str_flags:
        flags |= re.IGNORECASE
    if "l" in str_flags:
        flags |= re.LOCALE
    if "m" in str_flags:
        flags |= re.MULTILINE
    if "s" in str_flags:
        flags |= re.DOTALL
    if "u" in str_flags:
        flags |= re.UNICODE
    if "x" in str_flags:
        flags |= re.VERBOSE

    return flags 
Example 17
Project: vnpy_crypto   Author: birforce   File: __init__.py    License: MIT License 6 votes vote down vote up
def _encode_regex(name, value, dummy0, dummy1):
    """Encode a python regex or bson.regex.Regex."""
    flags = value.flags
    # Python 2 common case
    if flags == 0:
        return b"\x0B" + name + _make_c_string_check(value.pattern) + b"\x00"
    # Python 3 common case
    elif flags == re.UNICODE:
        return b"\x0B" + name + _make_c_string_check(value.pattern) + b"u\x00"
    else:
        sflags = b""
        if flags & re.IGNORECASE:
            sflags += b"i"
        if flags & re.LOCALE:
            sflags += b"l"
        if flags & re.MULTILINE:
            sflags += b"m"
        if flags & re.DOTALL:
            sflags += b"s"
        if flags & re.UNICODE:
            sflags += b"u"
        if flags & re.VERBOSE:
            sflags += b"x"
        sflags += b"\x00"
        return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags 
Example 18
Project: vnpy_crypto   Author: birforce   File: header_footer.py    License: MIT License 6 votes vote down vote up
def _split_string(text):
    """
    Split the combined (decoded) string into left, center and right parts

    # See http://stackoverflow.com/questions/27711175/regex-with-multiple-optional-groups for discussion
    """

    ITEM_REGEX = re.compile("""
    (&L(?P<left>.+?))?
    (&C(?P<center>.+?))?
    (&R(?P<right>.+?))?
    $""", re.VERBOSE | re.DOTALL)

    m = ITEM_REGEX.match(text)
    try:
        parts = m.groupdict()
    except AttributeError:
        warn("""Cannot parse header or footer so it will be ignored""")
        parts = {'left':'', 'right':'', 'center':''}
    return parts 
Example 19
Project: ciocheck   Author: ContinuumIO   File: linters.py    License: MIT License 5 votes vote down vote up
def _parse_regex(self, string):
        """Parse output with grouped regex."""
        results = []
        self.regex = re.compile(self.pattern, re.VERBOSE)
        for matches in self.regex.finditer(string):
            results.append(matches.groupdict())
        return results 
Example 20
Project: SublimeKSP   Author: nojanath   File: lex.py    License: GNU General Public License v3.0 5 votes vote down vote up
def readtab(self,tabfile,fdict):
        if isinstance(tabfile,types.ModuleType):
            lextab = tabfile
        else:
            if sys.version_info[0] < 3:
                exec("import %s as lextab" % tabfile)
            else:
                env = { }
                exec("import %s as lextab" % tabfile, env,env)
                lextab = env['lextab']

        if getattr(lextab,"_tabversion","0.0") != __tabversion__:
            raise ImportError("Inconsistent PLY version")

        self.lextokens      = lextab._lextokens
        self.lexreflags     = lextab._lexreflags
        self.lexliterals    = lextab._lexliterals
        self.lexstateinfo   = lextab._lexstateinfo
        self.lexstateignore = lextab._lexstateignore
        self.lexstatere     = { }
        self.lexstateretext = { }
        for key,lre in lextab._lexstatere.items():
             titem = []
             txtitem = []
             for i in range(len(lre)):
                  titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict)))
                  txtitem.append(lre[i][0])
             self.lexstatere[key] = titem
             self.lexstateretext[key] = txtitem
        self.lexstateerrorf = { }
        for key,ef in lextab._lexstateerrorf.items():
             self.lexstateerrorf[key] = fdict[ef]
        self.begin('INITIAL')

    # ------------------------------------------------------------
    # input() - Push a new string into the lexer
    # ------------------------------------------------------------ 
Example 21
Project: SublimeKSP   Author: nojanath   File: lex.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _form_master_re(relist,reflags,ldict,toknames):
    if not relist: return []
    regex = "|".join(relist)
    try:
        lexre = re.compile(regex,re.VERBOSE | reflags)

        # Build the index to function map for the matching engine
        lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
        lexindexnames = lexindexfunc[:]

        for f,i in lexre.groupindex.items():
            handle = ldict.get(f,None)
            if type(handle) in (types.FunctionType, types.MethodType):
                lexindexfunc[i] = (handle,toknames[f])
                lexindexnames[i] = f
            elif handle is not None:
                lexindexnames[i] = f
                if f.find("ignore_") > 0:
                    lexindexfunc[i] = (None,None)
                else:
                    lexindexfunc[i] = (None, toknames[f])
        
        return [(lexre,lexindexfunc)],[regex],[lexindexnames]
    except Exception:
        m = int(len(relist)/2)
        if m == 0: m = 1
        llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
        rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
        return llist+rlist, lre+rre, lnames+rnames

# -----------------------------------------------------------------------------
# def _statetoken(s,names)
#
# Given a declaration name s of the form "t_" and a dictionary whose keys are
# state names, this function returns a tuple (states,tokenname) where states
# is a tuple of state names and tokenname is the name of the token.  For example,
# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
# ----------------------------------------------------------------------------- 
Example 22
Project: scarlet   Author: pmelchior   File: test_docs.py    License: MIT License 5 votes vote down vote up
def escape_ansi_control(error):
    ansi_escape = re.compile(r'''
        \x1B    # ESC
        [@-_]   # 7-bit C1 Fe
        [0-?]*  # Parameter bytes
        [ -/]*  # Intermediate bytes
        [@-~]   # Final byte
    ''', re.VERBOSE)
    sanitized = ""
    for line in error:
        sanitized += ansi_escape.sub('', line) + "\n"
    return sanitized 
Example 23
Project: jawfish   Author: war-and-code   File: string.py    License: MIT License 5 votes vote down vote up
def __init__(cls, name, bases, dct):
        super(_TemplateMetaclass, cls).__init__(name, bases, dct)
        if 'pattern' in dct:
            pattern = cls.pattern
        else:
            pattern = _TemplateMetaclass.pattern % {
                'delim' : _re.escape(cls.delimiter),
                'id'    : cls.idpattern,
                }
        cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE) 
Example 24
Project: jawfish   Author: war-and-code   File: configparser.py    License: MIT License 5 votes vote down vote up
def __init__(self, defaults=None, dict_type=_default_dict,
                 allow_no_value=False, *, delimiters=('=', ':'),
                 comment_prefixes=('#', ';'), inline_comment_prefixes=None,
                 strict=True, empty_lines_in_values=True,
                 default_section=DEFAULTSECT,
                 interpolation=_UNSET):

        self._dict = dict_type
        self._sections = self._dict()
        self._defaults = self._dict()
        self._proxies = self._dict()
        self._proxies[default_section] = SectionProxy(self, default_section)
        if defaults:
            for key, value in defaults.items():
                self._defaults[self.optionxform(key)] = value
        self._delimiters = tuple(delimiters)
        if delimiters == ('=', ':'):
            self._optcre = self.OPTCRE_NV if allow_no_value else self.OPTCRE
        else:
            d = "|".join(re.escape(d) for d in delimiters)
            if allow_no_value:
                self._optcre = re.compile(self._OPT_NV_TMPL.format(delim=d),
                                          re.VERBOSE)
            else:
                self._optcre = re.compile(self._OPT_TMPL.format(delim=d),
                                          re.VERBOSE)
        self._comment_prefixes = tuple(comment_prefixes or ())
        self._inline_comment_prefixes = tuple(inline_comment_prefixes or ())
        self._strict = strict
        self._allow_no_value = allow_no_value
        self._empty_lines_in_values = empty_lines_in_values
        self.default_section=default_section
        self._interpolation = interpolation
        if self._interpolation is _UNSET:
            self._interpolation = self._DEFAULT_INTERPOLATION
        if self._interpolation is None:
            self._interpolation = Interpolation() 
Example 25
Project: jawfish   Author: war-and-code   File: test_re.py    License: MIT License 5 votes vote down vote up
def test_constants(self):
        self.assertEqual(re.I, re.IGNORECASE)
        self.assertEqual(re.L, re.LOCALE)
        self.assertEqual(re.M, re.MULTILINE)
        self.assertEqual(re.S, re.DOTALL)
        self.assertEqual(re.X, re.VERBOSE) 
Example 26
Project: razzy-spinner   Author: rafasashi   File: ycoe.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _read(files, conversion_function):
    if type(files) is str: files = (files,)

    for file in files:
        path = os.path.join(get_basedir(), "ycoe/pos", file)
        f = open(path).read()
        rx_pattern = re.compile(r"""
                <.*>_CODE
                |\s.*_ID
        """, re.VERBOSE|re.UNICODE)
        mySents = tokenize.blankline(f)
        for sent in mySents:
            sent= re.sub(rx_pattern, '', sent)
            if sent != "":
                yield conversion_function(sent, sep="_") 
Example 27
Project: razzy-spinner   Author: rafasashi   File: punkt.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _word_tokenizer_re(self):
        """Compiles and returns a regular expression for word tokenization"""
        try:
            return self._re_word_tokenizer
        except AttributeError:
            self._re_word_tokenizer = re.compile(
                self._word_tokenize_fmt %
                {
                    'NonWord':   self._re_non_word_chars,
                    'MultiChar': self._re_multi_char_punct,
                    'WordStart': self._re_word_start,
                },
                re.UNICODE | re.VERBOSE
            )
            return self._re_word_tokenizer 
Example 28
Project: razzy-spinner   Author: rafasashi   File: punkt.py    License: GNU General Public License v3.0 5 votes vote down vote up
def period_context_re(self):
        """Compiles and returns a regular expression to find contexts
        including possible sentence boundaries."""
        try:
            return self._re_period_context
        except:
            self._re_period_context = re.compile(
                self._period_context_fmt %
                {
                    'NonWord':      self._re_non_word_chars,
                    'SentEndChars': self._re_sent_end_chars,
                },
                re.UNICODE | re.VERBOSE)
            return self._re_period_context 
Example 29
Project: qutebrowser   Author: qutebrowser   File: webenginedownloads.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _get_suggested_filename(path):
    """Convert a path we got from chromium to a suggested filename.

    Chromium thinks we want to download stuff to ~/Download, so even if we
    don't, we get downloads with a suffix like (1) for files existing there.

    We simply strip the suffix off via regex.

    See https://bugreports.qt.io/browse/QTBUG-56978
    """
    filename = os.path.basename(path)

    suffix_re = re.compile(r"""
      \ ?  # Optional space between filename and suffix
      (
        # Numerical suffix
        \([0-9]+\)
      |
        # ISO-8601 suffix
        # https://cs.chromium.org/chromium/src/base/time/time_to_iso8601.cc
        \ -\ \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z
      )
      (?=\.|$)  # Begin of extension, or filename without extension
    """, re.VERBOSE)

    filename = suffix_re.sub('', filename)
    if not qtutils.version_check('5.9', compiled=False):
        # https://bugreports.qt.io/browse/QTBUG-58155
        filename = urllib.parse.unquote(filename)
        # Doing basename a *second* time because there could be a %2F in
        # there...
        filename = os.path.basename(filename)
    return filename 
Example 30
Project: qutebrowser   Author: qutebrowser   File: dictcli.py    License: GNU General Public License v3.0 5 votes vote down vote up
def parse_entry(entry):
    """Parse an entry from the remote API."""
    dict_re = re.compile(r"""
        (?P<filename>(?P<code>[a-z]{2}(-[A-Z]{2})?).*\.bdic)
    """, re.VERBOSE)
    match = dict_re.fullmatch(entry['name'])
    if match is not None:
        return match.group('code'), match.group('filename')
    else:
        return None