Python regex.error() Examples

The following are 6 code examples of regex.error(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module regex , or try the search function

Example #1

Source File: data_gathering.py From soweego with GNU General Public License v3.0

5 votes

def gather_relevant_pids():
    url_pids = set()
    for result in sparql_queries.url_pids():
        url_pids.add(result)
    ext_id_pids_to_urls = defaultdict(dict)
    for result in sparql_queries.external_id_pids_and_urls():
        for pid, formatters in result.items():
            for formatter_url, formatter_regex in formatters.items():
                if formatter_regex:
                    try:
                        compiled_regex = re.compile(formatter_regex)
                    except re.error:
                        LOGGER.debug(
                            "Using 'regex' third-party library. Formatter regex not supported by the 're' standard library: %s",
                            formatter_regex,
                        )
                        try:
                            compiled_regex = regex.compile(formatter_regex)
                        except regex.error:
                            LOGGER.debug(
                                "Giving up. Formatter regex not supported by 'regex': %s",
                                formatter_regex,
                            )
                            compiled_regex = None
                else:
                    compiled_regex = None
                ext_id_pids_to_urls[pid][formatter_url] = compiled_regex
    return url_pids, ext_id_pids_to_urls

Example #2

Source File: tests.py From python-iocextract with GNU General Public License v2.0

5 votes

def test_refang_never_excepts_from_urlparse(self):
        try:
            iocextract.refang_url('hxxp__test]')
            iocextract.refang_url('CDATA[^h00ps://test.com/]]>')
        except ValueError as e:
            self.fail('Unhandled parsing error in refang: {e}'.format(e=e))

Example #3

Source File: tests.py From python-iocextract with GNU General Public License v2.0

5 votes

def test_extract_custom_iocs_excepts_on_bad_regex(self):
        # Note: have to use list() here because exceptions are only raised when
        # the generator is executed.
        with self.assertRaises(re.error):
            list(iocextract.extract_custom_iocs('', [r'(mismatched paren']))
            list(iocextract.extract_custom_iocs('', [r'[mismatched bracket']))

        with self.assertRaises(IndexError):
            list(iocextract.extract_custom_iocs('', [r'no capture group']))
            list(iocextract.extract_custom_iocs('', [r'']))

Example #4

Source File: Utilities.py From UMI-tools with MIT License

5 votes

def error(message):
    '''log error message, see the :mod:`logging` module'''
    logging.error(message)
    raise ValueError("UMI-tools failed with an error. Check the log file")

Example #5

Source File: intdict.py From mwic with MIT License

4 votes

def __init__(self, lang):
        self._whitelist = set()
        regexes = []
        lang = lang.lower().replace('_', '-')
        while True:
            path = os.path.join(datadir, lang)
            try:
                file = open(path, 'rt', encoding='UTF-8')
            except FileNotFoundError:
                [lang, *suffix] = lang.rsplit('-', 1)
                if suffix:
                    continue
                else:
                    break
            macros = Macros()
            n = None  # hi, pylint
            def error(reason):  # no coverage
                return SyntaxError(reason, (file.name, n, 1, whole_line))
            with file:
                for n, line in enumerate(file, 1):
                    whole_line = line
                    if line.startswith('#'):
                        continue
                    line = line.split()
                    if not line:
                        continue
                    if line[0] == '*':
                        [word] = line[1:]
                        self._whitelist.add(word)
                        self._whitelist.add(word.upper())
                        self._whitelist.add(word.title())
                    elif line[0][0] == '@':
                        if (len(line) >= 4) and (line[0] == '@define') and (line[2] == '='):
                            (_, name, _, *definition) = line
                            definition = r'(?:{re})'.format(re=r'\s+'.join(definition))
                            try:
                                re.compile(definition)
                            except re.error as exc:  # no coverage
                                raise error(exc)
                            try:
                                macros[name] = macros.expand(definition)  # pylint: disable=unsubscriptable-object
                            except KeyError:  # no coverage
                                raise error('duplicate macro definition: {}'.format(name))
                        else:
                            raise error('malformed @-command')  # no coverage
                    else:
                        regex = r'\s+'.join(line)
                        regex = macros.expand(regex)
                        try:
                            re.compile(regex)
                        except re.error as exc:  # no coverage
                            raise error(exc)
                        regexes += [regex]
            break
        if regexes:
            regex = r'\b(?:(?i){0})\b'.format(
                '|'.join(regexes)
            )
            self._find = re.compile(regex).finditer
        else:
            self._find = _find_nothing

Example #6

Source File: iocextract.py From python-iocextract with GNU General Public License v2.0

4 votes

def extract_custom_iocs(data, regex_list):
    """Extract using custom regex strings.

    Will always yield only the first *group* match from each regex.

    Always use a single capture group! Do this::

        [
            r'(my regex)',  # This yields 'my regex' if the pattern matches.
            r'my (re)gex',  # This yields 're' if the pattern matches.
        ]

    NOT this::

        [
            r'my regex',  # BAD! This doesn't yield anything.
            r'(my) (re)gex',  # BAD! This yields 'my' if the pattern matches.
        ]

    For complicated regexes, you can combine capture and non-capture groups,
    like this::

        [
            r'(?:my|your) (re)gex',  # This yields 're' if the pattern matches.
        ]

    Note the (?: ) syntax for noncapture groups vs the ( ) syntax for the capture
    group.

    :param data: Input text
    :param regex_list: List of strings to treat as regex and match against data.
    :rtype: Iterator[:class:`str`]
    """
    # Compile all the regex strings first, so we can error out quickly.
    regex_objects = []
    for regex_string in regex_list:
        regex_objects.append(re.compile(regex_string))

    # Iterate over regex objects, running each against input data.
    for regex_object in regex_objects:
        for ioc in regex_object.finditer(data):
            yield ioc.group(1)