python source code of filtering

from typing import (
    overload,
    Union,
    Set,
    FrozenSet,
    Optional,
    Iterable,
    Iterator,
    TYPE_CHECKING,
)

import re

from .common import BoundingBox
from .exceptions import (
    NoElementFoundError,
    MultipleElementsFoundError,
    SectionNotFoundError,
)

if TYPE_CHECKING:
    from .components import PDFDocument, PDFElement


class ElementIterator(Iterator):
    index: int
    document: "PDFDocument"

    def __init__(self, element_list: "ElementList"):
        self.index = 0
        self.document = element_list.document
        self.indexes = iter(sorted(element_list.indexes))

    def __next__(self):
        index = next(self.indexes)
        return self.document._element_list[index]


class ElementList(Iterable):
    """
    Used to represent a list of elements, and to enable filtering of those elements.

    Any time you have a group of elements, for example pdf_document.elements or
    page.elements, you will get an `ElementList`. You can iterate through this, and also
    access specific elements. On top of this, there are lots of methods which you can
    use to further filter your elements. Since all of these methods return a new
    ElementList, you can chain these operations.

    Internally, we keep a set of indexes corresponding to the PDFElements in the
    document. This means you can treat ElementLists like sets to combine different
    ElementLists together.

    We often implement pluralised versions of methods, which is a shortcut to applying
    the or operator | to multiple ElementLists with the singular version applied, for
    example `foo.filter_by_tags("bar", "baz")` is the same as
    `foo.filter_by_tag("bar") | foo.filter_by_tag("baz")`.

    Similarly, chaining two filter commands is the same as applying the & operator,
    for example `foo.filter_by_tag("bar").filter_by_tag("baz")` is the same as
    `foo.filter_by_tag("bar") & foo.filter_by_tag("baz")`. Note that this is not the
    case for methods which do not filter, e.g. `add_element`.

    Ignored elements will be excluded on instantiation. Each time you chain a new filter
    a new ElementList is returned. Note this will remove newly-ignored elements.

    Note:
        As ElementList is implemented using sets internally, you will not be able to
        have an element in an ElementList multiple times.

    Args:
        document (PDFDocument): A reference to the PDF document
        indexes (set, optional): A set (or frozenset) of element indexes. Defaults to
            all elements in the document.

    Attributes:
        document (PDFDocument): A reference to the PDF document.
        indexes (set, optional): A frozenset of element indexes.
    """

    document: "PDFDocument"
    indexes: Union[Set[int], FrozenSet[int]]

    def __init__(
        self,
        document: "PDFDocument",
        indexes: Optional[Union[Set[int], FrozenSet[int]]] = None,
    ):
        self.document = document
        if indexes is not None:
            self.indexes = frozenset(indexes)
        else:
            self.indexes = frozenset(range(0, len(document._element_list)))
        self.indexes = self.indexes - self.document._ignored_indexes

    def add_tag_to_elements(self, tag: str) -> None:
        """
        Adds a tag to all elements in the list.

        Args:
            tag (str): The tag you would like to add.
        """
        for element in self:
            element.add_tag(tag)

    def filter_by_tag(self, tag: str) -> "ElementList":
        """
        Filter for elements containing only the given tag.

        Args:
            tag (str): The tag to filter for.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(element._index for element in self if tag in element.tags)
        return ElementList(self.document, new_indexes)

    def filter_by_tags(self, *tags: str) -> "ElementList":
        """
        Filter for elements containing any of the given tags.

        Args:
            *tags (str): The tags to filter for.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(
            element._index
            for element in self
            if any(tag in element.tags for tag in tags)
        )
        return ElementList(self.document, new_indexes)

    def filter_by_text_equal(self, text: str, stripped: bool = True) -> "ElementList":
        """
        Filter for elements whose text is exactly the given string.

        Args:
            text (str): The text to filter for.
            stripped (bool, optional): Whether to strip the text of the element before
                comparison. Default: True.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(
            element._index for element in self if element.text(stripped) == text
        )

        return ElementList(self.document, new_indexes)

    def filter_by_text_contains(self, text: str) -> "ElementList":
        """
        Filter for elements whose text contains the given string.

        Args:
            text (str): The text to filter for.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(element._index for element in self if text in element.text())
        return ElementList(self.document, new_indexes)

    def filter_by_regex(
        self,
        regex: str,
        regex_flags: Union[int, re.RegexFlag] = 0,
        stripped: bool = True,
    ):
        """
        Filter for elements given a regular expression.

        Args:
            regex (str): The regex to filter for.
            regex_flags (str, optional): Regex flags compatible with the re module.
                Default: 0.
            stripped (bool, optional): Whether to strip the text of the element before
                comparison. Default: True.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(
            element._index
            for element in self
            if re.match(regex, element.text(stripped), flags=regex_flags)
        )

        return ElementList(self.document, new_indexes)

    def filter_by_font(self, font: str) -> "ElementList":
        """
        Filter for elements containing only the given font.

        Args:
            font (str): The font to filter for.

        Returns:
            ElementList: The filtered list.
        """
        return self.filter_by_fonts(font)

    def filter_by_fonts(self, *fonts: str) -> "ElementList":
        """
        Filter for elements containing only the given font.

        Args:
            *fonts (str): The fonts to filter for.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = self.indexes & self.document._element_indexes_with_fonts(*fonts)
        return ElementList(self.document, new_indexes)

    def filter_by_page(self, page_number: int) -> "ElementList":
        """
        Filter for elements on the given page.

        Args:
            page (int): The page to filter for.

        Returns:
            ElementList: The filtered list.
        """
        page = self.document.get_page(page_number)
        new_indexes = set(element._index for element in page.elements)
        return self.__intersect_indexes_with_self(new_indexes)

    def filter_by_pages(self, *page_numbers: int) -> "ElementList":
        """
        Filter for elements on any of the given pages.

        Args:
            *pages (int): The pages to filter for.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes: Set[int] = set()
        for page_number in page_numbers:
            page = self.document.get_page(page_number)
            new_indexes |= set(element._index for element in page.elements)
        return self.__intersect_indexes_with_self(new_indexes)

    def filter_by_section_name(self, section_name: str) -> "ElementList":
        """
        Filter for elements within any section with the given name.

        See the sectioning documentation for more details.

        Args:
            section_name (str): The section name to filter for.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes: Set[int] = set()
        for section in self.document.sectioning.get_sections_with_name(section_name):
            new_indexes |= set(element._index for element in section.elements)
        return self.__intersect_indexes_with_self(new_indexes)

    def filter_by_section_names(self, *section_names: str) -> "ElementList":
        """
        Filter for elements within any section with any of the given names.

        See the sectioning documentation for more details.

        Args:
            *section_names (str): The section names to filter for.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes: Set[int] = set()
        for section_name in section_names:
            for section in self.document.sectioning.get_sections_with_name(
                section_name
            ):
                new_indexes |= set(element._index for element in section.elements)
        return self.__intersect_indexes_with_self(new_indexes)

    def filter_by_section(self, section_str: str) -> "ElementList":
        """
        Filter for elements within the given section.

        See the sectioning documentation for more details.

        Args:
            section_name (str): The section to filter for.

        Note:
            You need to specify an exact section, not just the name (i.e. "foo_0" not
            just "foo").

        Returns:
            ElementList: The filtered list.
        """
        try:
            section = self.document.sectioning.get_section(section_str)
            new_indexes = set(element._index for element in section.elements)
            return self.__intersect_indexes_with_self(new_indexes)
        except SectionNotFoundError:
            # Section doesn't exist - return empty ElementList.
            return self.__intersect_indexes_with_self(set())

    def filter_by_sections(self, *section_strs: str) -> "ElementList":
        """
        Filter for elements within any of the given sections.

        See the sectioning documentation for more details.

        Args:
            *section_names (str): The sections to filter for.

        Note:
            You need to specify an exact section, not just the name (i.e. "foo_0" not
            just "foo").

        Returns:
            ElementList: The filtered list.
        """
        new_indexes: Set[int] = set()
        for section_str in section_strs:
            try:
                section = self.document.sectioning.sections_dict[section_str]
                new_indexes |= set(element._index for element in section.elements)
            except SectionNotFoundError:
                # This section doesn't exist. That's fine, keep checking the other ones.
                pass
        return self.__intersect_indexes_with_self(new_indexes)

    def ignore_elements(self) -> None:
        """
        Marks all the elements in the ElementList as ignored.
        """
        self.document._ignored_indexes = self.document._ignored_indexes.union(
            self.indexes
        )

    def to_the_right_of(
        self, element: "PDFElement", inclusive: bool = False, tolerance: float = 0.0
    ) -> "ElementList":
        """
        Filter for elements which are to the right of the given element.

        If you draw a box from the right hand edge of the element to the right hand
        side of the page, all elements which are partially within this box are returned.

        Note:
            By "to the right of" we really mean "directly to the right of", i.e. the
            returned elements all have at least some part which is vertically aligned
            with the specified element.

        Note:
            Technically the element you specify will satisfy the condition, but we
            assume you do not want that element returned. If you do, you can pass
            `inclusive=True`.

        Args:
            element (PDFElement): The element in question.
            inclusive (bool, optional): Whether the include `element` in the returned
                results. Default: False.
            tolerance (int, optional): To be counted as to the right, the elements must
                overlap by at least `tolerance` on the Y axis. Tolerance is capped at
                half the height of the element. Default 0.

        Returns:
            ElementList: The filtered list.
        """
        page_number = element.page_number
        page = self.document.get_page(page_number)
        tolerance = min(element.bounding_box.height / 2, tolerance)
        bounding_box = BoundingBox(
            element.bounding_box.x1,
            page.width,
            element.bounding_box.y0 + tolerance,
            element.bounding_box.y1 - tolerance,
        )
        results = self.filter_partially_within_bounding_box(bounding_box, page_number)
        if not inclusive:
            results = results.remove_element(element)
        return results

    def to_the_left_of(
        self, element: "PDFElement", inclusive: bool = False, tolerance: float = 0.0
    ) -> "ElementList":
        """
        Filter for elements which are to the left of the given element.


        If you draw a box from the left hand edge of the element to the left hand
        side of the page, all elements which are partially within this box are returned.

        Note:
            By "to the left of" we really mean "directly to the left of", i.e. the
            returned elements all have at least some part which is vertically aligned
            with the specified element.

        Note:
            Technically the element you specify will satisfy the condition, but we
            assume you do not want that element returned. If you do, you can pass
            `inclusive=True`.

        Args:
            element (PDFElement): The element in question.
            inclusive (bool, optional): Whether the include `element` in the returned
                results. Default: False.
            tolerance (int, optional): To be counted as to the left, the elements must
                overlap by at least `tolerance` on the Y axis. Tolerance is capped at
                half the height of the element. Default 0.


        Returns:
            ElementList: The filtered list.
        """
        page_number = element.page_number
        tolerance = min(element.bounding_box.height / 2, tolerance)
        bounding_box = BoundingBox(
            0,
            element.bounding_box.x0,
            element.bounding_box.y0 + tolerance,
            element.bounding_box.y1 - tolerance,
        )
        results = self.filter_partially_within_bounding_box(bounding_box, page_number)
        if not inclusive:
            results = results.remove_element(element)
        return results

    def below(
        self,
        element: "PDFElement",
        inclusive: bool = False,
        all_pages: bool = False,
        tolerance: float = 0.0,
    ) -> "ElementList":
        """
        Returns all elements which are below the given element.

        If you draw a box from the bottom edge of the element to the bottom of the page,
        all elements which are partially within this box are returned. By default, only
        elements on the same page as the given element are included, but you can pass
        `inclusive=True` to also include the pages which come after (and so are below)
        the page containing the given element.

        Note:
            By "below" we really mean "directly below", i.e. the returned elements all
            have at least some part which is horizontally aligned with the specified
            element.

        Note:
            Technically the element you specify will satisfy the condition, but we
            assume you do not want that element returned. If you do, you can pass
            `inclusive=True`.

        Args:
            element (PDFElement): The element in question.
            inclusive (bool, optional): Whether the include `element` in the returned
                results. Default: False.
            all_pages (bool, optional): Whether to included pages other than the page
                which the element is on.
            tolerance (int, optional): To be counted as below, the elements must
                overlap by at least `tolerance` on the X axis. Tolerance is capped at
                half the width of the element. Default 0.

        Returns:
            ElementList: The filtered list.
        """
        page_number = element.page_number
        tolerance = min(element.bounding_box.width / 2, tolerance)
        bounding_box = BoundingBox(
            element.bounding_box.x0 + tolerance,
            element.bounding_box.x1 - tolerance,
            0,
            element.bounding_box.y0,
        )
        results = self.filter_partially_within_bounding_box(bounding_box, page_number)
        if all_pages:
            for page in self.document.pages:
                if page.page_number <= page_number:
                    continue
                # We're on a page which is located below our element, so the bounding
                # box should be the length of the entire page.
                bounding_box = BoundingBox(
                    element.bounding_box.x0 + tolerance,
                    element.bounding_box.x1 - tolerance,
                    0,
                    page.height,
                )
                results = results | self.filter_partially_within_bounding_box(
                    bounding_box, page.page_number
                )
        if not inclusive:
            results = results.remove_element(element)
        return results

    def above(
        self,
        element: "PDFElement",
        inclusive: bool = False,
        all_pages: bool = False,
        tolerance: float = 0.0,
    ) -> "ElementList":
        """
        Returns all elements which are above the given element.

        If you draw a box from the bottom edge of the element to the bottom of the page,
        all elements which are partially within this box are returned. By default, only
        elements on the same page as the given element are included, but you can pass
        `inclusive=True` to also include the pages which come before (and so are above)
        the page containing the given element.

        Note:
            By "above" we really mean "directly above", i.e. the returned elements all
            have at least some part which is horizontally aligned with the specified
            element.

        Note:
            Technically the element you specify will satisfy the condition, but we
            assume you do not want that element returned. If you do, you can pass
            `inclusive=True`.

        Args:
            element (PDFElement): The element in question.
            inclusive (bool, optional): Whether the include `element` in the returned
                results. Default: False.
            all_pages (bool, optional): Whether to included pages other than the page
                which the element is on.
            tolerance (int, optional): To be counted as above, the elements must
                overlap by at least `tolerance` on the X axis. Tolerance is capped at
                half the width of the element. Default 0.

        Returns:
            ElementList: The filtered list.
        """
        page_number = element.page_number
        page = self.document.get_page(page_number)
        tolerance = min(element.bounding_box.width / 2, tolerance)
        bounding_box = BoundingBox(
            element.bounding_box.x0 + tolerance,
            element.bounding_box.x1 - tolerance,
            element.bounding_box.y1,
            page.height,
        )
        results = self.filter_partially_within_bounding_box(bounding_box, page_number)
        if all_pages:
            for page in self.document.pages:
                if page.page_number >= page_number:
                    continue
                # We're on a page which is located above our element, so the bounding
                # box should be the length of the entire page.
                bounding_box = BoundingBox(
                    element.bounding_box.x0 + tolerance,
                    element.bounding_box.x1 - tolerance,
                    0,
                    page.height,
                )
                results = results | self.filter_partially_within_bounding_box(
                    bounding_box, page.page_number
                )
        if not inclusive:
            results = results.remove_element(element)
        return results

    def vertically_in_line_with(
        self,
        element: "PDFElement",
        inclusive: bool = False,
        all_pages: bool = False,
        tolerance: float = 0.0,
    ) -> "ElementList":
        """
        Returns all elements which are vertically in line with the
        given element.

        If you extend the left and right edges of the element to the top and bottom of
        the page, all elements which are partially within this box are returned. By
        default, only elements on the same page as the given element are included, but
        you can pass `inclusive=True` to include all pages.

        This is equivalent to doing `foo.above(...) | foo.below(...)`.

        Note:
            Technically the element you specify will satisfy the condition, but we
            assume you do not want that element returned. If you do, you can pass
            `inclusive=True`.

        Args:
            element (PDFElement): The element in question.
            inclusive (bool, optional): Whether the include `element` in the returned
                results. Default: False.
            all_pages (bool, optional): Whether to included pages other than the page
                which the element is on.
            tolerance (int, optional): To be counted as in line with, the elements must
                overlap by at least `tolerance` on the X axis. Tolerance is capped at
                half the width of the element. Default 0.

        Returns:
            ElementList: The filtered list.
        """
        page_number = element.page_number
        page = self.document.get_page(page_number)
        tolerance = min(element.bounding_box.width / 2, tolerance)
        bounding_box = BoundingBox(
            element.bounding_box.x0 + tolerance,
            element.bounding_box.x1 - tolerance,
            0,
            page.height,
        )
        results = self.filter_partially_within_bounding_box(bounding_box, page_number)
        if all_pages:
            for page_num in range(self[0].page_number, self[-1].page_number + 1):
                page = self.document.get_page(page_num)
                if page.page_number == page_number:
                    # Already handled page containing element
                    continue
                bounding_box = BoundingBox(
                    element.bounding_box.x0 + tolerance,
                    element.bounding_box.x1 - tolerance,
                    0,
                    page.height,
                )
                results = results | self.filter_partially_within_bounding_box(
                    bounding_box, page.page_number
                )

        if not inclusive:
            results = results.remove_element(element)
        return results

    def horizontally_in_line_with(
        self, element: "PDFElement", inclusive: bool = False, tolerance: float = 0.0
    ) -> "ElementList":
        """
        Returns all elements which are horizontally in line with the given element.

        If you extend the top and bottom edges of the element to the left and right of
        the page, all elements which are partially within this box are returned.

        This is equivalent to doing
        `foo.to_the_left_of(...) | foo.to_the_right_of(...)`.

        Note:
            Technically the element you specify will satisfy the condition, but we
            assume you do not want that element returned. If you do, you can pass
            `inclusive=True`.

        Args:
            element (PDFElement): The element in question.
            inclusive (bool, optional): Whether the include `element` in the returned
                results. Default: False.
            tolerance (int, optional): To be counted as in line with, the elements must
                overlap by at least `tolerance` on the Y axis. Tolerance is capped at
                half the width of the element. Default 0.

        Returns:
            ElementList: The filtered list.
        """
        page_number = element.page_number
        page = self.document.get_page(page_number)
        tolerance = min(element.bounding_box.height / 2, tolerance)
        bounding_box = BoundingBox(
            0,
            page.width,
            element.bounding_box.y0 + tolerance,
            element.bounding_box.y1 - tolerance,
        )
        results = self.filter_partially_within_bounding_box(bounding_box, page_number)
        if not inclusive:
            results = results.remove_element(element)
        return results

    def filter_partially_within_bounding_box(
        self, bounding_box: BoundingBox, page_number: int
    ) -> "ElementList":
        """
        Returns all elements on the given page which are partially within the given box.

        Args:
            bounding_box (BoundingBox): The bounding box to filter within.
            page_number (int): The page which you'd like to filter within the box.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes: Set[int] = set()
        for elem in self.filter_by_page(page_number):
            if elem.partially_within(bounding_box):
                new_indexes.add(elem._index)
        return self.__intersect_indexes_with_self(new_indexes)

    def before(self, element: "PDFElement", inclusive: bool = False) -> "ElementList":
        """
        Returns all elements before the specified element.

        By before, we mean preceding elements according to their index. The PDFDocument
        will order elements according to the specified element_ordering (which defaults
        to left to right, top to bottom).

        Args:
            element (PDFElement): The element in question.
            inclusive (bool, optional): Whether the include `element` in the returned
                results. Default: False.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(range(0, element._index))
        if inclusive:
            new_indexes.add(element._index)
        return self.__intersect_indexes_with_self(new_indexes)

    def after(self, element: "PDFElement", inclusive: bool = False) -> "ElementList":
        """
        Returns all elements after the specified element.

        By after, we mean succeeding elements according to their index. The PDFDocument
        will order elements according to the specified element_ordering (which defaults
        to left to right, top to bottom).

        Args:
            element (PDFElement): The element in question.
            inclusive (bool, optional): Whether the include `element` in the returned
                results. Default: False.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(range(element._index + 1, max(self.indexes) + 1))
        if inclusive:
            new_indexes.add(element._index)
        return self.__intersect_indexes_with_self(new_indexes)

    def between(
        self,
        start_element: "PDFElement",
        end_element: "PDFElement",
        inclusive: bool = False,
    ):
        """
        Returns all elements between the start and end elements.

        This is done according to the element indexes. The PDFDocument will order
        elements according to the specified element_ordering (which defaults
        to left to right, top to bottom).

        This is the same as applying `before` with `start_element` and `after` with
        `end_element`.

        Args:
            start_element (PDFElement): Returned elements will be after this element.
            end_element (PDFElement): Returned elements will be before this element.
            inclusive (bool, optional): Whether the include `start_element` and
                `end_element` in the returned results. Default: False.

        Returns:
            ElementList: The filtered list.
        """
        new_indexes = set(range(start_element._index + 1, end_element._index))
        if inclusive:
            new_indexes = new_indexes.union([start_element._index, end_element._index])
        return self.__intersect_indexes_with_self(new_indexes)

    def extract_single_element(self) -> "PDFElement":
        """
        Returns only element in the ElementList, provided there is only one element.

        This is mainly for convenience, when you think you've filtered down to a single
        element and you would like to extract said element.

        Raises:
            NoElementFoundError: If there are no elements in the ElementList
            MultipleElementsFoundError: If there is more than one element in the
                ElementList

        Returns:
            PDFElement: The single element remaining in the list.
        """
        if len(self.indexes) == 0:
            raise NoElementFoundError("There are no elements in the ElementList")
        if len(self.indexes) > 1:
            raise MultipleElementsFoundError(
                f"There are {len(self.indexes)} elements in the ElementList"
            )

        return self.document._element_list[list(self.indexes)[0]]

    def add_element(self, element: "PDFElement") -> "ElementList":
        """
        Explicitly adds the element to the ElementList.

        Note:
            If the element is already in the ElementList, this does nothing.

        Args:
            element (PDFElement): The element to add.

        Returns:
            ElementList: A new list with the additional element.
        """
        return ElementList(self.document, self.indexes | set([element._index]))

    def add_elements(self, *elements: "PDFElement") -> "ElementList":
        """
        Explicitly adds the elements to the ElementList.

        Note:
            If the elements is already in the ElementList, this does nothing.

        Args:
            *elements (PDFElement): The elements to add.

        Returns:
            ElementList: A new list with the additional elements.
        """
        return ElementList(
            self.document, self.indexes | set([element._index for element in elements])
        )

    def remove_element(self, element: "PDFElement") -> "ElementList":
        """
        Explicitly removes the element from the ElementList.

        Note:
            If the element is not in the ElementList, this does nothing.

        Args:
            element (PDFElement): The element to remove.

        Returns:
            ElementList: A new list without the element.
        """
        return ElementList(self.document, self.indexes - set([element._index]))

    def remove_elements(self, *elements: "PDFElement") -> "ElementList":
        """
        Explicitly removes the elements from the ElementList.

        Note:
            If the elements are not in the ElementList, this does nothing.

        Args:
            *elements (PDFElement): The elements to remove.

        Returns:
            ElementList: A new list without the elements.
        """
        return ElementList(
            self.document, self.indexes - set(element._index for element in elements)
        )

    def __intersect_indexes_with_self(self, new_indexes: Set[int]) -> "ElementList":
        return self & ElementList(self.document, new_indexes)

    def __iter__(self) -> ElementIterator:
        """
        Returns an ElementIterator class that allows iterating through elements.

        Elements will be returned in order of the elements in the document,
        left-to-right, top-to-bottom (the same as you read).
        """
        return ElementIterator(self)

    def __contains__(self, element: "PDFElement") -> bool:
        """
        Returns True if the element is in the ElementList, otherwise False.
        """
        return element._index in self.indexes

    def __repr__(self):
        return f"<ElementList of {len(self.indexes)} elements>"

    @overload
    def __getitem__(self, key: int) -> "PDFElement":
        pass  # This is for type checking only

    @overload
    def __getitem__(self, key: slice) -> "ElementList":
        pass  # This is for type checking only

    def __getitem__(self, key: Union[int, slice]) -> Union["PDFElement", "ElementList"]:
        """
        Returns the element in position `key` of the ElementList if an int is given, or
        returns a new ElementList if a slice is given.

        Elements are ordered by their original positions in the document, which is
        left-to-right, top-to-bottom (the same you you read).
        """
        if isinstance(key, slice):
            new_indexes = set(sorted(self.indexes)[key])
            return ElementList(self.document, new_indexes)
        element_index = sorted(self.indexes)[key]
        return self.document._element_list[element_index]

    def __eq__(self, other: object) -> bool:
        """
        Returns True if the two ElementLists contain the same elements from the same
        document.
        """
        if not isinstance(other, ElementList):
            raise NotImplementedError(f"Can't compare ElementList with {type(other)}")
        return (
            self.indexes == other.indexes
            and self.document == other.document
            and self.__class__ == other.__class__
        )

    def __hash__(self):
        return hash(hash(self.indexes) + hash(self.document))

    def __len__(self):
        """
        Returns the number of elements in the ElementList.
        """
        return len(self.indexes)

    def __sub__(self, other: "ElementList") -> "ElementList":
        """
        Returns an ElementList of elements that are in the first ElementList but not in
        the second.
        """
        return ElementList(self.document, self.indexes - other.indexes)

    def __or__(self, other: "ElementList") -> "ElementList":
        """
        Returns an ElementList of elements that are in either ElementList
        """
        return ElementList(self.document, self.indexes | other.indexes)

    def __xor__(self, other: "ElementList") -> "ElementList":
        """
        Returns an ElementList of elements that are in either ElementList, but not both.
        """
        return ElementList(self.document, self.indexes ^ other.indexes)

    def __and__(self, other: "ElementList") -> "ElementList":
        """
        Returns an ElementList of elements that are in both ElementList
        """
        return ElementList(self.document, self.indexes & other.indexes)