Python re.RegexFlag() Examples

The following are 16 code examples of re.RegexFlag(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module re , or try the search function .
Example #1
Source File: utils.py    From py-pdf-parser with MIT License 6 votes vote down vote up
def create_pdf_element(
    bounding_box: "BoundingBox" = BoundingBox(0, 1, 0, 1),
    text: str = "fake_text",
    font_name: str = "fake_font",
    font_size: float = 10,
    font_mapping: Optional[Dict[str, str]] = None,
    font_mapping_is_regex: bool = False,
    regex_flags: Union[int, re.RegexFlag] = 0,
    font_size_precision: int = 1,
) -> "PDFElement":
    document = create_pdf_document(
        elements=[
            FakePDFMinerTextElement(
                bounding_box, text=text, font_name=font_name, font_size=font_size
            )
        ],
        font_mapping=font_mapping,
        font_mapping_is_regex=font_mapping_is_regex,
        regex_flags=regex_flags,
        font_size_precision=font_size_precision,
    )
    return document.elements[0] 
Example #2
Source File: _strings.py    From dexplo with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def contains(self, column=None, pat=None, case=True, flags=0, na=nan, regex=True, keep=False):
        if not isinstance(case, (bool, np.bool_)):
            raise TypeError('`case` must be a boolean')
        if not isinstance(flags, (int, np.integer, re.RegexFlag)):
            raise TypeError('flags must be a `RegexFlag` or integer')
        if not isinstance(pat, (str, Pattern)):
            raise TypeError('`pat` must either be either a string or compiled regex pattern')
        if not isinstance(regex, (bool, np.bool_)):
            raise TypeError('`regex` must be a boolean')

        return self._str_generic(name='contains', column=column, keep=keep, multiple=True,
                                 pat=pat, case=case, flags=flags, na=na, regex=regex) 
Example #3
Source File: _strings.py    From dexplo with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def count(self, column=None, pat=None, case=True, flags=0, na=nan, regex=True, keep=False):
        """

        Parameters
        ----------
        column
        pat
        case - gets ignored whenever
        flags
        na
        regex
        keep

        Returns
        -------

        """
        if not isinstance(case, (bool, np.bool_)):
            raise TypeError('`case` must be a boolean')
        if not isinstance(flags, (int, np.integer, re.RegexFlag)):
            raise TypeError('flags must be a `RegexFlag` or integer')
        if not isinstance(pat, (str, Pattern)):
            raise TypeError('`pat` must either be either a string or compiled regex pattern')
        if not isinstance(regex, (bool, np.bool_)):
            raise TypeError('`regex` must be a boolean')

        return self._str_generic(name='count', column=column, keep=keep, multiple=True,
                                 pat=pat, case=case, flags=flags, na=na, regex=regex) 
Example #4
Source File: _strings.py    From dexplo with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def findall(self, column=None, pat=None, pos=0, case=True, flags=0, keep=False):
        if not isinstance(pat, (str, Pattern)):
            raise TypeError('`pat` must be a str or compiled regular expression')
        if not isinstance(pos, (int, np.integer)):
            raise TypeError('`n` must be an integer')
        if not isinstance(case, (bool, np.bool_)):
            raise TypeError('`case` must be a boolean')
        if not isinstance(flags, (int, np.integer, re.RegexFlag)):
            raise TypeError('flags must be a `RegexFlag` or integer')

        return self._str_generic_concat('findall', column, keep, pat=pat, pos=pos,
                                        case=case, flags=flags, return_dtype='S') 
Example #5
Source File: _strings.py    From dexplo with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def replace(self, column=None, pat=None, repl=None, n=0, case=True, flags=0, keep=False):
        if not isinstance(case, (bool, np.bool_)):
            raise TypeError('`case` must be a boolean')
        if not isinstance(flags, (int, np.integer, re.RegexFlag)):
            raise TypeError('`flags` must be a `RegexFlag` or integer')
        if not isinstance(n, (int, np.integer, re.RegexFlag)):
            raise TypeError('`n` must be a `RegexFlag` or integer')
        if not isinstance(pat, (str, Pattern)):
            raise TypeError('`pat` must either be either a string or compiled regex pattern')
        if not isinstance(repl, str) or callable(repl):
            raise TypeError('`repl` must either be either a string or compiled regex pattern')

        return self._str_generic(name='replace', column=column, keep=keep, multiple=False,
                                 pat=pat, repl=repl, n=n, case=case, flags=flags) 
Example #6
Source File: _strings.py    From dexplo with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def rsplit(self, column=None, pat=None, n=0, case=True, flags=0, keep=False):
        if not isinstance(pat, (str, Pattern)):
            raise TypeError('`pat` must be a str or compiled regular expression')
        if not isinstance(n, (int, np.integer)):
            raise TypeError('`n` must be an integer')
        if not isinstance(case, (bool, np.bool_)):
            raise TypeError('`case` must be a boolean')
        if not isinstance(flags, (int, np.integer, re.RegexFlag)):
            raise TypeError('flags must be a `RegexFlag` or integer')

        return self._str_generic_concat('rsplit', column, keep, pat=pat, n=n, case=case, flags=flags,
                                        return_dtype='S') 
Example #7
Source File: bot.py    From twitch-chat-bot with MIT License 5 votes vote down vote up
def handler(
    *prefixes: str,
    flags: re.RegexFlag = re.U,
) -> Callable[[Callback], Callback]:
    def handler_decorator(func: Callback) -> Callback:
        for prefix in prefixes:
            HANDLERS.append((re.compile(prefix + '\r\n$', flags=flags), func))
        return func
    return handler_decorator 
Example #8
Source File: bot.py    From twitch-chat-bot with MIT License 5 votes vote down vote up
def handle_message(
        *message_prefixes: str,
        flags: re.RegexFlag = re.U,
) -> Callable[[Callback], Callback]:
    return handler(
        *(
            f'^@(?P<info>[^ ]+) :(?P<user>[^!]+).* '
            f'PRIVMSG #(?P<channel>[^ ]+) '
            f':(?P<msg>{message_prefix}.*)'
            for message_prefix in message_prefixes
        ), flags=flags,
    ) 
Example #9
Source File: _evaljs.py    From altair-transform with MIT License 5 votes vote down vote up
def _reflags(self) -> re.RegexFlag:
        flags = re.RegexFlag(0)
        for key, flag in self._flagmap.items():
            if key in self._flags:
                flags |= flag
        return flags 
Example #10
Source File: filtering.py    From py-pdf-parser with MIT License 5 votes vote down vote up
def filter_by_regex(
        self,
        regex: str,
        regex_flags: Union[int, re.RegexFlag] = 0,
        stripped: bool = True,
    ):
        """
        Filter for elements given a regular expression.

        Args:
            regex (str): The regex to filter for.
            regex_flags (str, optional): Regex flags compatible with the re module.
                Default: 0.
            stripped (bool, optional): Whether to strip the text of the element before
                comparison. Default: True.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(
            element._index
            for element in self
            if re.match(regex, element.text(stripped), flags=regex_flags)
        )

        return ElementList(self.document, new_indexes) 
Example #11
Source File: utils.py    From py-pdf-parser with MIT License 5 votes vote down vote up
def create_pdf_document(
    elements: Union[List[LTComponent], Dict[int, List[LTComponent]]],
    font_mapping: Optional[Dict[str, str]] = None,
    font_mapping_is_regex: bool = False,
    regex_flags: Union[int, re.RegexFlag] = 0,
    font_size_precision: int = 1,
    element_ordering: Union[
        ElementOrdering, Callable[[List], List]
    ] = ElementOrdering.LEFT_TO_RIGHT_TOP_TO_BOTTOM,
) -> "PDFDocument":
    """
    Creates a PDF document with the given elements.
    "elements" can be a list of elements (in which case a document with a single page
    will be created) or a dictionary mapping page number to its list of elements.
    """
    if not isinstance(elements, dict):
        pages = {1: Page(elements=elements, width=100, height=100)}
    else:
        pages = {
            page_number: Page(elements=elements_list, width=100, height=100)
            for page_number, elements_list in elements.items()
        }

    return PDFDocument(
        pages=pages,
        font_mapping=font_mapping,
        font_mapping_is_regex=font_mapping_is_regex,
        regex_flags=regex_flags,
        font_size_precision=font_size_precision,
        element_ordering=element_ordering,
    ) 
Example #12
Source File: processors.py    From python-prompt-toolkit with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def apply_transformation(
        self, transformation_input: TransformationInput
    ) -> Transformation:

        (
            buffer_control,
            document,
            lineno,
            source_to_display,
            fragments,
            _,
            _,
        ) = transformation_input.unpack()

        search_text = self._get_search_text(buffer_control)
        searchmatch_fragment = " class:%s " % (self._classname,)
        searchmatch_current_fragment = " class:%s " % (self._classname_current,)

        if search_text and not get_app().is_done:
            # For each search match, replace the style string.
            line_text = fragment_list_to_text(fragments)
            fragments = explode_text_fragments(fragments)

            if buffer_control.search_state.ignore_case():
                flags = re.IGNORECASE
            else:
                flags = re.RegexFlag(0)

            # Get cursor column.
            cursor_column: Optional[int]
            if document.cursor_position_row == lineno:
                cursor_column = source_to_display(document.cursor_position_col)
            else:
                cursor_column = None

            for match in re.finditer(re.escape(search_text), line_text, flags=flags):
                if cursor_column is not None:
                    on_cursor = match.start() <= cursor_column < match.end()
                else:
                    on_cursor = False

                for i in range(match.start(), match.end()):
                    old_fragment, text, *_ = fragments[i]
                    if on_cursor:
                        fragments[i] = (
                            old_fragment + searchmatch_current_fragment,
                            fragments[i][1],
                        )
                    else:
                        fragments[i] = (
                            old_fragment + searchmatch_fragment,
                            fragments[i][1],
                        )

        return Transformation(fragments) 
Example #13
Source File: test_writer.py    From django-sqlserver with MIT License 4 votes vote down vote up
def test_serialize_class_based_validators(self):
        """
        Ticket #22943: Test serialization of class-based validators, including
        compiled regexes.
        """
        validator = RegexValidator(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator(message='hello')")
        self.serialize_round_trip(validator)

        # Test with a compiled regex.
        validator = RegexValidator(regex=re.compile(r'^\w+$', re.U))
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator(regex=re.compile('^\\\\w+$', 32))")
        self.serialize_round_trip(validator)

        # Test a string regex with flag
        validator = RegexValidator(r'^[0-9]+$', flags=re.U)
        string = MigrationWriter.serialize(validator)[0]
        if PY36:
            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=re.RegexFlag(32))")
        else:
            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=32)")
        self.serialize_round_trip(validator)

        # Test message and code
        validator = RegexValidator('^[-a-zA-Z0-9_]+$', 'Invalid', 'invalid')
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator('^[-a-zA-Z0-9_]+$', 'Invalid', 'invalid')")
        self.serialize_round_trip(validator)

        # Test with a subclass.
        validator = EmailValidator(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.EmailValidator(message='hello')")
        self.serialize_round_trip(validator)

        validator = deconstructible(path="migrations.test_writer.EmailValidator")(EmailValidator)(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "migrations.test_writer.EmailValidator(message='hello')")

        validator = deconstructible(path="custom.EmailValidator")(EmailValidator)(message="hello")
        with self.assertRaisesRegex(ImportError, "No module named '?custom'?"):
            MigrationWriter.serialize(validator)

        validator = deconstructible(path="django.core.validators.EmailValidator2")(EmailValidator)(message="hello")
        with self.assertRaisesMessage(ValueError, "Could not find object EmailValidator2 in django.core.validators."):
            MigrationWriter.serialize(validator) 
Example #14
Source File: test_writer.py    From djongo with GNU Affero General Public License v3.0 4 votes vote down vote up
def test_serialize_class_based_validators(self):
        """
        Ticket #22943: Test serialization of class-based validators, including
        compiled regexes.
        """
        validator = RegexValidator(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator(message='hello')")
        self.serialize_round_trip(validator)

        # Test with a compiled regex.
        validator = RegexValidator(regex=re.compile(r'^\w+$'))
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator(regex=re.compile('^\\\\w+$'))")
        self.serialize_round_trip(validator)

        # Test a string regex with flag
        validator = RegexValidator(r'^[0-9]+$', flags=re.S)
        string = MigrationWriter.serialize(validator)[0]
        if PY36:
            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=re.RegexFlag(16))")
        else:
            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=16)")
        self.serialize_round_trip(validator)

        # Test message and code
        validator = RegexValidator('^[-a-zA-Z0-9_]+$', 'Invalid', 'invalid')
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator('^[-a-zA-Z0-9_]+$', 'Invalid', 'invalid')")
        self.serialize_round_trip(validator)

        # Test with a subclass.
        validator = EmailValidator(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.EmailValidator(message='hello')")
        self.serialize_round_trip(validator)

        validator = deconstructible(path="migrations.test_writer.EmailValidator")(EmailValidator)(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "migrations.test_writer.EmailValidator(message='hello')")

        validator = deconstructible(path="custom.EmailValidator")(EmailValidator)(message="hello")
        with self.assertRaisesMessage(ImportError, "No module named 'custom'"):
            MigrationWriter.serialize(validator)

        validator = deconstructible(path="django.core.validators.EmailValidator2")(EmailValidator)(message="hello")
        with self.assertRaisesMessage(ValueError, "Could not find object EmailValidator2 in django.core.validators."):
            MigrationWriter.serialize(validator) 
Example #15
Source File: test_writer.py    From djongo with GNU Affero General Public License v3.0 4 votes vote down vote up
def test_serialize_class_based_validators(self):
        """
        Ticket #22943: Test serialization of class-based validators, including
        compiled regexes.
        """
        validator = RegexValidator(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator(message='hello')")
        self.serialize_round_trip(validator)

        # Test with a compiled regex.
        validator = RegexValidator(regex=re.compile(r'^\w+$'))
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator(regex=re.compile('^\\\\w+$'))")
        self.serialize_round_trip(validator)

        # Test a string regex with flag
        validator = RegexValidator(r'^[0-9]+$', flags=re.S)
        string = MigrationWriter.serialize(validator)[0]
        if PY36:
            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=re.RegexFlag(16))")
        else:
            self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=16)")
        self.serialize_round_trip(validator)

        # Test message and code
        validator = RegexValidator('^[-a-zA-Z0-9_]+$', 'Invalid', 'invalid')
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.RegexValidator('^[-a-zA-Z0-9_]+$', 'Invalid', 'invalid')")
        self.serialize_round_trip(validator)

        # Test with a subclass.
        validator = EmailValidator(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "django.core.validators.EmailValidator(message='hello')")
        self.serialize_round_trip(validator)

        validator = deconstructible(path="migrations.test_writer.EmailValidator")(EmailValidator)(message="hello")
        string = MigrationWriter.serialize(validator)[0]
        self.assertEqual(string, "migrations.test_writer.EmailValidator(message='hello')")

        validator = deconstructible(path="custom.EmailValidator")(EmailValidator)(message="hello")
        with self.assertRaisesMessage(ImportError, "No module named 'custom'"):
            MigrationWriter.serialize(validator)

        validator = deconstructible(path="django.core.validators.EmailValidator2")(EmailValidator)(message="hello")
        with self.assertRaisesMessage(ValueError, "Could not find object EmailValidator2 in django.core.validators."):
            MigrationWriter.serialize(validator) 
Example #16
Source File: components.py    From py-pdf-parser with MIT License 4 votes vote down vote up
def __init__(
        self,
        pages: Dict[int, "Page"],
        pdf_file_path: Optional[str] = None,
        font_mapping: Optional[Dict[str, str]] = None,
        font_mapping_is_regex: bool = False,
        regex_flags: Union[int, re.RegexFlag] = 0,
        font_size_precision: int = 1,
        element_ordering: Union[
            ElementOrdering, Callable[[List], List]
        ] = ElementOrdering.LEFT_TO_RIGHT_TOP_TO_BOTTOM,
    ):
        self.sectioning = Sectioning(self)
        self._element_list = []
        self._element_indexes_by_font = defaultdict(set)
        self._font_mapping = font_mapping if font_mapping is not None else {}
        self._font_mapping_is_regex = font_mapping_is_regex
        self._regex_flags = regex_flags
        self._ignored_indexes = set()
        self.__pages = {}
        idx = 0
        for page_number, page in sorted(pages.items()):
            first_element = None
            if isinstance(element_ordering, ElementOrdering):
                sort_func = _ELEMENT_ORDERING_FUNCTIONS[element_ordering]
            else:
                sort_func = element_ordering
            for element in sort_func(page.elements):
                pdf_element = PDFElement(
                    document=self,
                    element=element,
                    index=idx,
                    page_number=page_number,
                    font_size_precision=font_size_precision,
                )
                self._element_list.append(pdf_element)
                idx += 1
                if first_element is None:
                    first_element = pdf_element

            if first_element is None:
                raise NoElementsOnPageError(
                    f"No elements on page {page_number}, please exclude this page"
                )

            self.__pages[page_number] = PDFPage(
                document=self,
                width=page.width,
                height=page.height,
                page_number=page_number,
                start_element=first_element,
                end_element=pdf_element,
            )

        self._pdf_file_path = pdf_file_path
        self.number_of_pages = len(pages)
        self.page_numbers = [page.page_number for page in self.pages]