Python regex.MULTILINE Examples

The following are 30 code examples of regex.MULTILINE(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module regex , or try the search function

Example #1

Source File: regexp_tokenizer.py From OpenQA with MIT License

6 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' %
            (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN,
             self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2,
             self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE,
             self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT,
             self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True)

Example #2

Source File: cmd_data_extract_ipv4.py From habu with BSD 3-Clause "New" or "Revised" License

6 votes

def extract_ipv4(data):

    #regexp = re.compile(r'\s?((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\s?', flags=re.MULTILINE)
    regexp = re.compile(r'[\s():{}\[\]]{1}((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)[\s():{}\[\]]{1}', flags=re.MULTILINE)

    regexp = re.compile(r'[^0-9]?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[^0-9]?', flags=re.MULTILINE)

    match = regexp.finditer(data)

    result = []

    for m in match:
        try:
            ipaddress.ip_address(m.group(1))
            result.append(m.group(1))
        except ValueError:
            continue
        #ip = m.group(0).strip(' ():{}[]')
        #ip = ip.strip()
        #if ip:
        #    result.append(ip)

    return result

Example #3

Source File: regexp_tokenizer.py From justcopy-backend with MIT License

6 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' %
            (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN,
             self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2,
             self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE,
             self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT,
             self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True)

Example #4

Source File: regexp_tokenizer.py From FusionNet with MIT License

6 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' %
            (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN,
             self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2,
             self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE,
             self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT,
             self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True)

Example #5

Source File: regexp_tokenizer.py From RCZoo with MIT License

6 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)' %
            (self.DIGIT, self.TITLE, self.ABBRV, self.NEGATION, self.HYPHEN,
             self.CONTRACTION1, self.ALPHA_NUM, self.CONTRACTION2,
             self.START_DQUOTE, self.END_DQUOTE, self.START_SQUOTE,
             self.END_SQUOTE, self.DASH, self.ELLIPSES, self.PUNCT,
             self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True)

Example #6

Source File: utils.py From RCZoo with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #7

Source File: grammar.py From estnltk with GNU General Public License v2.0

5 votes

def __init__(self, pattern, flags=re.UNICODE | re.MULTILINE | re.IGNORECASE, name=None):
        super(IRegex, self).__init__(pattern, flags, name)

Example #8

Source File: retrieval_drqa_eval.py From semanticRetrievalMRS with MIT License

5 votes

def regex_match(text, pattern):
    """Test if a regex pattern is contained within a text."""
    try:
        pattern = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE,
        )
    except BaseException:
        return False
    return pattern.search(text) is not None

Example #9

Source File: utils.py From RCZoo with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #10

Source File: utils.py From RCZoo with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #11

Source File: utils.py From RCZoo with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #12

Source File: simple_tokenizer.py From RCZoo with MIT License

5 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
        """
        self._regexp = regex.compile(
            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()

Example #13

Source File: regexp_tokenizer.py From neural_chat with MIT License

5 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)'
            % (
                self.DIGIT,
                self.TITLE,
                self.ABBRV,
                self.NEGATION,
                self.HYPHEN,
                self.CONTRACTION1,
                self.ALPHA_NUM,
                self.CONTRACTION2,
                self.START_DQUOTE,
                self.END_DQUOTE,
                self.START_SQUOTE,
                self.END_SQUOTE,
                self.DASH,
                self.ELLIPSES,
                self.PUNCT,
                self.NON_WS,
            ),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE,
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning(
                '%s only tokenizes! Skipping annotators: %s'
                % (type(self).__name__, kwargs.get('annotators'))
            )
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True)

Example #14

Source File: utils.py From RCZoo with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #15

Source File: utils.py From RCZoo with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #16

Source File: utils.py From RCZoo with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #17

Source File: utils.py From RCZoo with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #18

Source File: utils.py From Multi-Step-Reasoning with Apache License 2.0

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #19

Source File: grammar.py From estnltk with GNU General Public License v2.0

5 votes

def __init__(self, pattern, flags=re.UNICODE | re.MULTILINE, name=None):
        super(Regex, self).__init__(name)
        self.__pattern = re.compile(pattern, flags=flags)

Example #20

Source File: utils.py From MnemonicReader with BSD 3-Clause "New" or "Revised" License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #21

Source File: simple_tokenizer.py From neural_chat with MIT License

5 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
        """
        self._regexp = regex.compile(
            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE,
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning(
                '%s only tokenizes! Skipping annotators: %s'
                % (type(self).__name__, kwargs.get('annotators'))
            )
        self.annotators = set()

Example #22

Source File: parsed_text_corrector.py From lexpredict-contraxsuite with GNU Affero General Public License v3.0

5 votes

def setup_parser():
        from lexnlp.extract.en.amounts import CURRENCY_SYMBOL_MAP
        symbols = '|'.join([k for k in CURRENCY_SYMBOL_MAP]).replace('$', r'\$')
        ParsedTextCorrector.PATTERN_MONEY_BREAK = ParsedTextCorrector.PATTERN_MONEY_BREAK.format(symbols=symbols)
        ParsedTextCorrector.REGEX_MONEY_BREAK = re.compile(
            ParsedTextCorrector.PATTERN_MONEY_BREAK,
            re.IGNORECASE | re.DOTALL | re.MULTILINE | re.VERBOSE | re.UNICODE)

Example #23

Source File: utils.py From chepy with GNU General Public License v3.0

5 votes

def regex_search(
        self,
        pattern: str,
        ignore_case: bool = False,
        multiline: bool = False,
        dotall: bool = False,
        unicode: bool = False,
        extended: bool = False,
    ):
        """Regex search on current data
        
        Args:
            pattern (str): Required. The regex pattern to search by
            ignore_case (bool, optional): Set case insentive flag. Defaults to False.
            multiline (bool, optional): ^/$ match start/end. Defaults to False.
            dotall (bool, optional): `.` matches newline. Defaults to False.
            unicode (bool, optional): Match unicode characters. Defaults to False.
            extended (bool, optional): Ignore whitespace. Defaults to False.
        
        Returns:
            Chepy: The Chepy object.

        Examples:
            >>> c = Chepy("loLolololoL")
            >>> c.regex_search("ol", ignore_case=True)
        """
        flags = 0
        if ignore_case:
            flags += re.IGNORECASE
        if multiline:
            flags += re.MULTILINE
        if dotall:
            flags += re.DOTALL
        if unicode:
            flags += re.UNICODE
        if extended:
            flags += re.X
        self.state = re.findall(pattern, self._convert_to_str(), flags=flags)
        return self

Example #24

Source File: regexp_tokenizer.py From ParlAI with MIT License

5 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
            substitutions: if true, normalizes some token types (e.g. quotes).
        """
        self._regexp = regex.compile(
            '(?P<digit>%s)|(?P<title>%s)|(?P<abbr>%s)|(?P<neg>%s)|(?P<hyph>%s)|'
            '(?P<contr1>%s)|(?P<alphanum>%s)|(?P<contr2>%s)|(?P<sdquote>%s)|'
            '(?P<edquote>%s)|(?P<ssquote>%s)|(?P<esquote>%s)|(?P<dash>%s)|'
            '(?<ellipses>%s)|(?P<punct>%s)|(?P<nonws>%s)'
            % (
                self.DIGIT,
                self.TITLE,
                self.ABBRV,
                self.NEGATION,
                self.HYPHEN,
                self.CONTRACTION1,
                self.ALPHA_NUM,
                self.CONTRACTION2,
                self.START_DQUOTE,
                self.END_DQUOTE,
                self.START_SQUOTE,
                self.END_SQUOTE,
                self.DASH,
                self.ELLIPSES,
                self.PUNCT,
                self.NON_WS,
            ),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE,
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning(
                '%s only tokenizes! Skipping annotators: %s'
                % (type(self).__name__, kwargs.get('annotators'))
            )
        self.annotators = set()
        self.substitutions = kwargs.get('substitutions', True)

Example #25

Source File: simple_tokenizer.py From ParlAI with MIT License

5 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
        """
        self._regexp = regex.compile(
            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE,
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning(
                '%s only tokenizes! Skipping annotators: %s'
                % (type(self).__name__, kwargs.get('annotators'))
            )
        self.annotators = set()

Example #26

Source File: simple_tokenizer.py From FusionNet with MIT License

5 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
        """
        self._regexp = regex.compile(
            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()

Example #27

Source File: genrelease.py From ibis with Apache License 2.0

5 votes

def iter_release_notes(repo, from_ref, to_ref, default_role):
    """Yield release notes from `from_ref` to `to_ref`."""
    pattern = re.compile(
        r'^(?:{})\s+#(\d+)\s+from'.format('|'.join(GITHUB_CLOSE_KEYWORDS)),
        flags=re.MULTILINE | re.IGNORECASE,
    )
    for commit in commits_between(
        repo, from_ref, to_ref, options=pygit2.GIT_SORT_TOPOLOGICAL
    ):
        message = commit.message.strip()
        subject, *lines = map(str.strip, message.splitlines())
        tag, *rest = subject.split(':', 1)
        tag = tag.lower()
        lineitem = ''.join(rest) or subject
        role = KEYWORD_MAP.get(tag, default_role)
        modifier = ' major' if role == 'bug' else ''
        try:
            issue_number, *_ = pattern.findall(message)
        except ValueError:
            issue_number = '-'
        yield "* :{role}:`{issue_number}{modifier}` {lineitem}".format(
            role=role,
            issue_number=issue_number,
            modifier=modifier,
            lineitem=lineitem.strip(),
        )

Example #28

Source File: eval.py From justcopy-backend with MIT License

5 votes

def regex_match(text, pattern):
    """Test if a regex pattern is contained within a text."""
    try:
        pattern = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE,
        )
    except BaseException:
        return False
    return pattern.search(text) is not None

Example #29

Source File: utils.py From justcopy-backend with MIT License

5 votes

def regex_match_score(prediction, pattern):
    """Check if the prediction matches the given regular expression."""
    try:
        compiled = re.compile(
            pattern,
            flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
        )
    except BaseException:
        logger.warn('Regular expression failed to compile: %s' % pattern)
        return False
    return compiled.match(prediction) is not None

Example #30

Source File: simple_tokenizer.py From justcopy-backend with MIT License

5 votes

def __init__(self, **kwargs):
        """
        Args:
            annotators: None or empty set (only tokenizes).
        """
        self._regexp = regex.compile(
            '(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
            flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
        )
        if len(kwargs.get('annotators', {})) > 0:
            logger.warning('%s only tokenizes! Skipping annotators: %s' %
                           (type(self).__name__, kwargs.get('annotators')))
        self.annotators = set()