python source code of typesystem

from collections import defaultdict
from itertools import chain, filterfalse
from io import BytesIO
from pathlib import Path
import re
from typing import Callable, Dict, IO, Iterator, Optional, Set, Union, Iterable, List
import warnings

from toposort import toposort_flatten

from more_itertools import unique_everseen

import attr

from lxml import etree

TOP_TYPE_NAME = "uima.cas.TOP"

_DOCUMENT_ANNOTATION_TYPE = "uima.tcas.DocumentAnnotation"

_PREDEFINED_TYPES = {
    "uima.cas.TOP",
    "uima.cas.NULL",
    "uima.cas.Boolean",
    "uima.cas.Byte",
    "uima.cas.Short",
    "uima.cas.Integer",
    "uima.cas.Long",
    "uima.cas.Float",
    "uima.cas.Double",
    "uima.cas.String",
    "uima.cas.ArrayBase",
    "uima.cas.FSArray",
    "uima.cas.FloatArray",
    "uima.cas.IntegerArray",
    "uima.cas.StringArray",
    "uima.cas.ListBase",
    "uima.cas.FSList",
    "uima.cas.EmptyFSList",
    "uima.cas.NonEmptyFSList",
    "uima.cas.FloatList",
    "uima.cas.EmptyFloatList",
    "uima.cas.NonEmptyFloatList",
    "uima.cas.IntegerList",
    "uima.cas.EmptyIntegerList",
    "uima.cas.NonEmptyIntegerList",
    "uima.cas.StringList",
    "uima.cas.EmptyStringList",
    "uima.cas.NonEmptyStringList",
    "uima.cas.BooleanArray",
    "uima.cas.ByteArray",
    "uima.cas.ShortArray",
    "uima.cas.LongArray",
    "uima.cas.DoubleArray",
    "uima.cas.Sofa",
    "uima.cas.AnnotationBase",
    "uima.tcas.Annotation",
}

_PRIMITIVE_TYPES = {
    "uima.cas.Boolean",
    "uima.cas.Byte",
    "uima.cas.Short",
    "uima.cas.Integer",
    "uima.cas.Long",
    "uima.cas.Float",
    "uima.cas.Double",
    "uima.cas.String",
}

_COLLECTION_TYPES = {
    "uima.cas.ArrayBase",
    "uima.cas.FSArray",
    "uima.cas.FloatArray",
    "uima.cas.IntegerArray",
    "uima.cas.StringArray",
    "uima.cas.ListBase",
    "uima.cas.FSList",
    "uima.cas.EmptyFSList",
    "uima.cas.NonEmptyFSList",
    "uima.cas.FloatList",
    "uima.cas.EmptyFloatList",
    "uima.cas.NonEmptyFloatList",
    "uima.cas.IntegerList",
    "uima.cas.EmptyIntegerList",
    "uima.cas.NonEmptyIntegerList",
    "uima.cas.StringList",
    "uima.cas.EmptyStringList",
    "uima.cas.NonEmptyStringList",
    "uima.cas.BooleanArray",
    "uima.cas.ByteArray",
    "uima.cas.ShortArray",
    "uima.cas.LongArray",
    "uima.cas.DoubleArray",
}

_PRIMITIVE_COLLECTION_TYPES = {
    "uima.cas.FloatArray",
    "uima.cas.IntegerArray",
    "uima.cas.StringArray",
    "uima.cas.FloatList",
    "uima.cas.EmptyFloatList",
    "uima.cas.NonEmptyFloatList",
    "uima.cas.IntegerList",
    "uima.cas.EmptyIntegerList",
    "uima.cas.NonEmptyIntegerList",
    "uima.cas.StringList",
    "uima.cas.EmptyStringList",
    "uima.cas.NonEmptyStringList",
    "uima.cas.BooleanArray",
    "uima.cas.ByteArray",
    "uima.cas.ShortArray",
    "uima.cas.LongArray",
    "uima.cas.DoubleArray",
}


def _string_to_valid_classname(name: str):
    return re.sub("[^a-zA-Z0-9_]", "_", name)


@attr.s
class TypeCheckError(Exception):
    xmiID = attr.ib()  # int: xmiID of the feature structure with type error
    description = attr.ib()  # str: Description of the type check error


@attr.s(slots=True, hash=False, eq=True, order=True)
class FeatureStructure:
    """The base class for all feature structure instances"""

    type = attr.ib()  # str: Type name of this feature structure instance
    xmiID = attr.ib(default=None, eq=False)  # int: xmiID of this feature structure instance

    def value(self, name: str):
        """ Returns the value of the feature `name`. """
        return getattr(self, name)

    def get_covered_text(self) -> str:
        """ Gets the text that is covered by this feature structure iff it is associated with a sofa and has a begin/end.

        Returns:
            The text covered by the annotation

        """
        if hasattr(self, "sofa") and hasattr(self, "begin") and hasattr(self, "end"):
            return self.sofa.sofaString[self.begin : self.end]
        else:
            raise NotImplementedError()

    def __hash__(self):
        return self.xmiID

    def __eq__(self, other):
        return self.__slots__ == other.__slots__


@attr.s(slots=True, eq=False, order=False)
class Feature:
    """A feature defines one attribute of a feature structure"""

    name = attr.ib()  # type: str
    rangeTypeName = attr.ib()  # type: str
    description = attr.ib(default=None)  # type: str
    elementType = attr.ib(default=None)  # type: str
    multipleReferencesAllowed = attr.ib(default=None)  # type: bool
    _has_reserved_name = attr.ib(default=False)  # type: bool

    def __eq__(self, other):
        if not isinstance(other, Feature):
            return False
        if self.name != other.name or self.description != other.description:
            return False

        if self.rangeTypeName != other.rangeTypeName:
            return False

        # If elementType is `None`, then we assume the default is `TOP`
        if (self.elementType or TOP_TYPE_NAME) != (other.elementType or TOP_TYPE_NAME):
            return False

        # If multipleReferencesAllowed is `None`, then we assume the default is `False`
        self_multiref = False if self.multipleReferencesAllowed is None else self.multipleReferencesAllowed
        other_multiref = False if self.multipleReferencesAllowed is None else self.multipleReferencesAllowed
        if self_multiref != other_multiref:
            return False

        return True

    def __ne__(self, other):
        return not self.__eq__(other)

    def __lt__(self, other):
        return self.name < other.name


@attr.s(slots=True)
class Type:
    """ Describes types in a type system.

    Instances of this class should not be created by hand, instead the type 
    system's `create_type` should be used.

    """

    name = attr.ib()  # type: str #: Type name of this type
    supertypeName = attr.ib()  # type: str # : Name of the super type
    description = attr.ib(default=None)  # type: str #: Description of this type
    _children = attr.ib(factory=dict)  # type: Dict[str, Type]
    _features = attr.ib(factory=dict)  # type: Dict[str, Feature]
    _inherited_features = attr.ib(factory=dict)  # type: Dict[str, Feature]
    _constructor = attr.ib(init=False, eq=False, order=False, repr=False)  # type: Callable[[Dict], FeatureStructure]

    def __attrs_post_init__(self):
        """ Build the constructor that can create feature structures of this type """
        name = _string_to_valid_classname(self.name)
        fields = {feature.name: attr.ib(default=None, repr=(feature.name != "sofa")) for feature in self.all_features}
        fields["type"] = attr.ib(default=self.name)

        self._constructor = attr.make_class(name, fields, bases=(FeatureStructure,), slots=True, eq=False, order=False)

    def __call__(self, **kwargs) -> FeatureStructure:
        """ Creates an feature structure of this type
        
        When called with keyword arguments whose keys are the feature names and values are the 
        respective feature values, then a new feature structure instance is created.

        Returns:
            A new feature structure instance of this type.

        """
        return self._constructor(**kwargs)

    def get_feature(self, name: str) -> Optional[Feature]:
        """ Find a feature by name

        This returns `None` if this type does not contain a feature
        with the given `name`.

        Args:
            name: The name of the feature

        Returns:
            The feature with name `name` or `None` if it does not exist.
        """
        return self._features.get(name, None)

    def add_feature(self, feature: Feature, inherited: bool = False):
        """ Add the given feature to his type.

        Args:
            feature: The feature
            inherited: Indicates whether this feature is inherited from a parent or not

        """
        target = self._features if not inherited else self._inherited_features

        # Check that feature is not defined in on current type
        if feature.name in target:
            redefined_feature = target[feature.name]

            if redefined_feature == feature:
                msg = "Feature with name [{0}] already exists in [{1}]!".format(feature.name, self.name)
                warnings.warn(msg)
            else:
                msg = "Feature with name [{0}] already exists in [{1}] but is redefined differently!".format(
                    feature.name, self.name
                )
                raise ValueError(msg)
            return

        # Check that feature is not redefined on parent type
        if feature.name in self._inherited_features:
            redefined_feature = self._inherited_features[feature.name]

            if redefined_feature == feature:
                msg = "Feature with name [{0}] already exists in parent!".format(feature.name)
                warnings.warn(msg)
            else:
                msg = "Feature with name [{0}] already exists in parent but is redefined!".format(feature.name)
                raise ValueError(msg)
            return

        target[feature.name] = feature

        # Recreate constructor to incorporate new features
        self.__attrs_post_init__()

        for child_type in self._children.values():
            child_type.add_feature(feature, inherited=True)

    @property
    def features(self) -> Iterator[Feature]:
        """ Returns an iterator over the features of this type. Inherited features are excluded. To
        find these in addition to this types' own features, use `all_features`.

        Returns:
            An iterator over all features of this type, excluding inherited ones

        """
        return iter(self._features.values())

    @property
    def all_features(self) -> Iterator[Feature]:
        """ Returns an iterator over the features of this type. Inherited features are included. To
        just retrieve immediate features, use `features`.

        Returns:
            An iterator over all features of this type, including inherited ones

        """

        # We use `unique_everseen` here, as children could redefine parent types (Issue #56)
        return unique_everseen(chain(self._features.values(), self._inherited_features.values()))

    @property
    def children(self) -> Iterator["Type"]:
        yield from self._children.values()


class TypeSystem:
    def __init__(self, add_document_annotation_type: bool = True):
        self._types = {}

        # We store types that are predefined but still defined in the typesystem here
        # In order to restore them when serializing
        self._predefined_types = set()

        # The type system of a UIMA CAS has several predefined types. These are
        # added in the following

        # `top` is directly assigned in order to circumvent the inheritance
        top = Type(name=TOP_TYPE_NAME, supertypeName=None)
        self._types[top.name] = top

        # cas:NULL
        self.create_type(name="uima.cas.NULL", supertypeName="uima.cas.TOP")

        # Primitive types
        self.create_type(name="uima.cas.Boolean", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Byte", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Short", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Integer", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Long", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Float", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Double", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.String", supertypeName="uima.cas.TOP")

        # Array
        t = self.create_type(name="uima.cas.ArrayBase", supertypeName="uima.cas.TOP")
        self.add_feature(t, name="elements", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)

        self.create_type(name="uima.cas.FSArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.BooleanArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.ByteArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.ShortArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.LongArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.DoubleArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.FloatArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.IntegerArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.StringArray", supertypeName="uima.cas.ArrayBase")

        # List
        self.create_type(name="uima.cas.ListBase", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.FSList", supertypeName="uima.cas.ListBase")
        self.create_type(name="uima.cas.EmptyFSList", supertypeName="uima.cas.FSList")
        t = self.create_type(name="uima.cas.NonEmptyFSList", supertypeName="uima.cas.FSList")
        self.add_feature(t, name="head", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
        self.add_feature(t, name="tail", rangeTypeName="uima.cas.FSList", multipleReferencesAllowed=True)

        # FloatList
        self.create_type(name="uima.cas.FloatList", supertypeName="uima.cas.ListBase")
        self.create_type(name="uima.cas.EmptyFloatList", supertypeName="uima.cas.FloatList")
        t = self.create_type(name="uima.cas.NonEmptyFloatList", supertypeName="uima.cas.FloatList")
        self.add_feature(t, name="head", rangeTypeName="uima.cas.Float")
        self.add_feature(t, name="tail", rangeTypeName="uima.cas.FloatList", multipleReferencesAllowed=True)

        # IntegerList
        self.create_type(name="uima.cas.IntegerList", supertypeName="uima.cas.ListBase")
        self.create_type(name="uima.cas.EmptyIntegerList", supertypeName="uima.cas.IntegerList")
        t = self.create_type(name="uima.cas.NonEmptyIntegerList", supertypeName="uima.cas.IntegerList")
        self.add_feature(t, name="head", rangeTypeName="uima.cas.Integer")
        self.add_feature(t, name="tail", rangeTypeName="uima.cas.IntegerList", multipleReferencesAllowed=True)

        # StringList
        self.create_type(name="uima.cas.StringList", supertypeName="uima.cas.ListBase")
        self.create_type(name="uima.cas.EmptyStringList", supertypeName="uima.cas.StringList")
        t = self.create_type(name="uima.cas.NonEmptyStringList", supertypeName="uima.cas.StringList")
        self.add_feature(t, name="head", rangeTypeName="uima.cas.String")
        self.add_feature(t, name="tail", rangeTypeName="uima.cas.StringList", multipleReferencesAllowed=True)

        # Sofa
        t = self.create_type(name="uima.cas.Sofa", supertypeName="uima.cas.TOP")
        self.add_feature(t, name="sofaNum", rangeTypeName="uima.cas.Integer")
        self.add_feature(t, name="sofaID", rangeTypeName="uima.cas.String")
        self.add_feature(t, name="mimeType", rangeTypeName="uima.cas.String")
        self.add_feature(t, name="sofaArray", rangeTypeName="uima.cas.TOP", multipleReferencesAllowed=True)
        self.add_feature(t, name="sofaString", rangeTypeName="uima.cas.String")
        self.add_feature(t, name="sofaURI", rangeTypeName="uima.cas.String")

        # AnnotationBase
        t = self.create_type(name="uima.cas.AnnotationBase", supertypeName="uima.cas.TOP")
        self.add_feature(t, name="sofa", rangeTypeName="uima.cas.Sofa")

        # Annotation
        t = self.create_type(name="uima.tcas.Annotation", supertypeName="uima.cas.AnnotationBase")
        self.add_feature(t, name="begin", rangeTypeName="uima.cas.Integer")
        self.add_feature(t, name="end", rangeTypeName="uima.cas.Integer")

        if add_document_annotation_type:
            self._add_document_annotation_type()

    def contains_type(self, typename: str):
        """ Checks whether this type system contains a type with name `typename`.

        Args:
            typename: The name of type whose existence is to be checked.

        Returns:
            `True` if a type with `typename` exists, else `False`.
        """
        return typename in self._types

    def create_type(self, name: str, supertypeName: str = "uima.tcas.Annotation", description: str = None) -> Type:
        """ Creates a new type and return it.

        Args:
            name: The name of the new type
            supertypeName: The name of the new types' supertype. Defaults to `uima.cas.AnnotationBase`
            description: The description of the new type

        Returns:
            The newly created type
        """
        if self.contains_type(name) and name not in _PREDEFINED_TYPES:
            msg = "Type with name [{0}] already exists!".format(name)
            raise ValueError(msg)

        new_type = Type(name=name, supertypeName=supertypeName, description=description)

        if supertypeName != TOP_TYPE_NAME:
            supertype = self.get_type(supertypeName)
            supertype._children[name] = new_type

            for feature in supertype.all_features:
                new_type.add_feature(feature, inherited=True)

        self._types[name] = new_type
        return new_type

    def get_type(self, type_name: str) -> Type:
        """ Finds a type by name in the type system of this CAS.

        Args:
            typename: The name of the type to retrieve

        Returns:
            The type with name `typename`
        Raises:
            Exception: If no type with `typename` could be found.
        """
        if self.contains_type(type_name):
            return self._types[type_name]
        else:
            raise Exception("Type with name [{0}] not found!".format(type_name))

    def get_types(self) -> Iterator[Type]:
        """ Returns all types of this type system """
        return filterfalse(lambda x: x.name in _PREDEFINED_TYPES, self._types.values())

    def is_instance_of(self, type_name: str, parent_name: str) -> bool:
        if type_name == parent_name:
            return True
        elif type_name == TOP_TYPE_NAME:
            return False
        else:
            return self.is_instance_of(self.get_type(type_name).supertypeName, parent_name)

    def is_primitive(self, type_name: str) -> bool:
        """ Checks if the type identified by `type_name` is a primitive type.

        Args:
            type_name: The name of the type to query for.
        Returns:
            Returns True if the type identified by `type_name` is a primitive type, else False
        """
        if type_name == TOP_TYPE_NAME:
            return False
        elif type_name in _PRIMITIVE_TYPES:
            return True
        else:
            return self.is_primitive(self.get_type(type_name).supertypeName)

    def is_collection(self, type_name: str, feature: Feature) -> bool:
        """ Checks if the given feature for the type identified by ``type_name`is a collection, e.g. list or array.

        Args:
            type_name: The type name to which the feature belongs.
            feature: The feature to query for.
        Returns:
            Returns True if the given feature is a collection type, else False
        """
        if type_name in _COLLECTION_TYPES and feature.name == "elements":
            return True
        else:
            return feature.rangeTypeName in _COLLECTION_TYPES

    def is_primitive_collection(self, type_name) -> bool:
        """ Checks if the type identified by `type_name` is a primitive collection, e.g. list or array of primitives.

        Args:
            type_name: The name of the type to query for.
        Returns:
            Returns True if the type identified by `type_name` is a primitive collection type, else False
        """
        if type_name == TOP_TYPE_NAME:
            return False
        elif type_name in _PRIMITIVE_COLLECTION_TYPES:
            return True
        else:
            return self.is_primitive_collection(self.get_type(type_name).supertypeName)

    def subsumes(self, parent_name: str, child_name: str) -> bool:
        """ Determines if the type `child_name` is a child of `parent_name`.

        Args:
            parent_name: Name of the parent type
            child_name: Name of the child type

        Returns:
            True if `parent_name` subsumes `child_name` else False
        """
        if parent_name == TOP_TYPE_NAME:
            return True

        cur = child_name
        while cur:
            if cur == parent_name:
                return True
            else:
                cur = self.get_type(cur).supertypeName

        return False

    def add_feature(
        self,
        type_: Type,
        name: str,
        rangeTypeName: str,
        elementType: str = None,
        description: str = None,
        multipleReferencesAllowed: bool = None,
    ):
        """ Adds a feature to the given type.

        Args:
            type_: The type to which the feature will be added
            name: The name of the new feature
            rangeTypeName: The feature's rangeTypeName specifies the type of value that the feature can take.
            elementType: The elementType of a feature is optional, and applies only when the rangeTypeName
                is uima.cas.FSArray or uima.cas.FSList The elementType specifies what type of value can be
                assigned as an element of the array or list.
            description: The description of the new feature
            multipleReferencesAllowed: Setting this to true indicates that the array or list may be shared,
                so changes to it may affect other objects in the CAS.

        Raises:
            Exception: If a feature with name `name` already exists in `type_`.
        """
        has_reserved_name = False

        if name == "self" or name == "type":
            msg = "Trying to add feature `{0}` which is a reserved name in Python, renamed accessor to '{0}_' !".format(
                name
            )
            name = name + "_"
            has_reserved_name = True
            warnings.warn(msg)

        feature = Feature(
            name=name,
            rangeTypeName=rangeTypeName,
            elementType=elementType,
            description=description,
            multipleReferencesAllowed=multipleReferencesAllowed,
            has_reserved_name=has_reserved_name,
        )

        type_.add_feature(feature)

    def to_xml(self, path: Union[str, Path, None] = None) -> Optional[str]:
        """Creates a XMI representation of this type system.

        Args:
            path: File path or file-like object, if `None` is provided the result is returned as a string.

        Returns:
            If `path` is None, then the XML representation of this type system is returned as a string.

        """
        serializer = TypeSystemSerializer()

        # If `path` is None, then serialize to a string and return it
        if path is None:
            sink = BytesIO()
            serializer.serialize(sink, self)
            return sink.getvalue().decode("utf-8")
        elif isinstance(path, str):
            with open(path, "wb") as f:
                serializer.serialize(f, self)
        elif isinstance(path, Path):
            with path.open("wb") as f:
                serializer.serialize(f, self)
        else:
            raise TypeError("`path` needs to be one of [str, None, Path], but was <{0}>".format(type(path)))

    def typecheck(self, fs: FeatureStructure) -> List[TypeCheckError]:
        """ Checks whether a feature structure is type sound.

        Currently only checks `uima.cas.FSArray` and `uima.cas.FSList`.

        Args:
            fs: The feature structure to type check.

        Returns:
            List of type errors found, empty list of no errors were found.
        """
        errors = []

        t = self.get_type(fs.type)
        for f in t.all_features:
            # Check FS collections
            if f.rangeTypeName == "uima.cas.FSArray" or f.rangeTypeName == "uima.cas.FSList":
                # We check for every element that it is of type `elementType` or a child thereof
                element_type = f.elementType or TOP_TYPE_NAME
                for e in fs.value(f.name):
                    if not self.subsumes(element_type, e.type):
                        msg = "Member of [{0}] has unsound type: was [{1}], need [{2}]!".format(
                            f.rangeTypeName, e.type, element_type
                        )
                        errors.append(TypeCheckError(fs.xmiID, msg))

        return errors

    def _defines_predefined_type(self, type_name):
        self._predefined_types.add(type_name)

    def _add_document_annotation_type(self):
        t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName="uima.tcas.Annotation")
        self.add_feature(t, name="language", rangeTypeName="uima.cas.String")


# Deserializing


def load_typesystem(source: Union[IO, str]) -> TypeSystem:
    """ Loads a type system from a XML source.

    Args:
        source: The XML source. If `source` is a string, then it is assumed to be an XML string.
                If `source` is a file-like object, then the data is read from it.

    Returns:
        The deserialized type system

    """
    deserializer = TypeSystemDeserializer()
    if isinstance(source, str):
        return deserializer.deserialize(BytesIO(source.encode("utf-8")))
    else:
        return deserializer.deserialize(source)


class TypeSystemDeserializer:
    def deserialize(self, source: Union[IO, str]) -> TypeSystem:
        """

        Args:
            source: a filename or file object containing XML data

        Returns:
            typesystem (TypeSystem):
        """

        # It can be that the types in the xml are listed out-of-order, that means
        # some type A appears before its supertype. In order to deserialize these
        # files properly without sacrificing the requirement that the supertype
        # of a type needs to already be present, we sort the graph of types and
        # supertypes topologically. This means a supertype will always be inserted
        # before its children. The inheritance relation is expressed in the
        # `dependencies` dictionary.
        types = {}
        features = defaultdict(list)
        type_dependencies = defaultdict(set)

        context = etree.iterparse(source, events=("end",), tag=("{*}typeDescription",))
        for event, elem in context:
            type_name = self._get_elem_as_str(elem.find("{*}name"))
            description = self._get_elem_as_str(elem.find("{*}description"))
            supertypeName = self._get_elem_as_str(elem.find("{*}supertypeName"))

            if "." not in type_name:
                type_name = "uima.noNamespace." + type_name

            if "." not in supertypeName:
                supertypeName = "uima.noNamespace." + supertypeName

            types[type_name] = Type(name=type_name, supertypeName=supertypeName, description=description)
            type_dependencies[type_name].add(supertypeName)

            # Parse features
            for fd in elem.iterfind("{*}features/{*}featureDescription"):
                feature_name = self._get_elem_as_str(fd.find("{*}name"))
                rangeTypeName = self._get_elem_as_str(fd.find("{*}rangeTypeName"))
                description = self._get_elem_as_str(fd.find("{*}description"))
                multipleReferencesAllowed = self._get_elem_as_bool(fd.find("{*}multipleReferencesAllowed"))
                elementType = self._get_elem_as_str(fd.find("{*}elementType"))

                f = Feature(
                    name=feature_name,
                    rangeTypeName=rangeTypeName,
                    description=description,
                    multipleReferencesAllowed=multipleReferencesAllowed,
                    elementType=elementType,
                )
                features[type_name].append(f)

            # Free the XML tree element from memory as it is not needed anymore
            elem.clear()
            while elem.getprevious() is not None:
                del elem.getparent()[0]
        del context

        ts = TypeSystem(add_document_annotation_type=False)

        # Some CAS handling libraries add predefined types to the typesystem XML.
        # Here we check that the redefinition of predefined types adheres to the definition in UIMA
        for type_name, t in types.items():
            if type_name in _PREDEFINED_TYPES:
                pt = ts.get_type(type_name)

                t_features = list(sorted(features[type_name]))
                pt_features = list(sorted(pt.features))

                if t.supertypeName != pt.supertypeName:
                    msg = "Redefining predefined type [{0}] with different superType [{1}], expected [{2}]"
                    raise ValueError(msg.format(type_name, t.supertypeName, pt.supertypeName))

                # We check whether the predefined type is defined the same in UIMA and this typesystem
                if t_features == pt_features:
                    # No need to create predefined types, but store them for serialization
                    ts._defines_predefined_type(type_name)
                    continue
                else:
                    msg = "Redefining predefined type [{0}] with different features: {1} - Have to be {2}"
                    raise ValueError(msg.format(type_name, t_features, pt_features))

        # Add the types to the type system in order of dependency (parents before children)
        created_types = []
        for type_name in toposort_flatten(type_dependencies, sort=False):
            # No need to recreate predefined types
            if type_name in _PREDEFINED_TYPES:
                continue

            t = types[type_name]
            created_type = ts.create_type(name=t.name, description=t.description, supertypeName=t.supertypeName)
            created_types.append(created_type)

        # Add the features to the type AFTER we create all the types to not cause circular references
        # between type references in inheritance and type references in range or element type.
        for t in created_types:
            for f in features[t.name]:
                ts.add_feature(
                    t,
                    name=f.name,
                    rangeTypeName=f.rangeTypeName,
                    elementType=f.elementType,
                    description=f.description,
                    multipleReferencesAllowed=f.multipleReferencesAllowed,
                )

        # DocumentAnnotation is not a predefined UIMA type, but some applications assume that it exists.
        # It can be defined by users with custom fields. In case the loaded type system did not define
        # it, we add the standard DocumentAnnotation type. In case it is already defined, we add it to
        # the list of redefined predefined types so that is written back on serialization.
        if not ts.contains_type(_DOCUMENT_ANNOTATION_TYPE):
            ts._add_document_annotation_type()
        else:
            ts._defines_predefined_type(_DOCUMENT_ANNOTATION_TYPE)

        return ts

    def _get_elem_as_str(self, elem: etree.Element) -> Optional[str]:
        if elem is not None:
            return elem.text if elem.text is None else elem.text.strip()
        else:
            return None

    def _get_elem_as_bool(self, elem: etree.Element) -> Optional[bool]:
        if elem is not None:
            return bool(elem.text)
        else:
            return None


# Serializing


class TypeSystemSerializer:
    def serialize(self, sink: Union[IO, str], typesystem: TypeSystem):
        nsmap = {None: "http://uima.apache.org/resourceSpecifier"}
        with etree.xmlfile(sink) as xf:
            with xf.element("typeSystemDescription", nsmap=nsmap):
                with xf.element("types"):
                    # In order to export the same types that we imported, we
                    # also emit the (redundant) predefined types
                    for predefined_type_name in sorted(typesystem._predefined_types):
                        predefined_type = typesystem.get_type(predefined_type_name)
                        self._serialize_type(xf, predefined_type)

                    for type_ in sorted(typesystem.get_types(), key=lambda t: t.name):
                        # We do not want to serialize our implicitly added DocumentAnnotation.
                        # If it was defined by the user, it is in `typesystem._predefined_types`
                        # and serialized in the loop before.
                        if type_.name == _DOCUMENT_ANNOTATION_TYPE:
                            continue

                        self._serialize_type(xf, type_)

    def _serialize_type(self, xf: IO, type_: Type):
        typeDescription = etree.Element("typeDescription")

        name = etree.SubElement(typeDescription, "name")
        type_name = type_.name
        if type_name.startswith("uima.noNamespace."):
            type_name = type_name.replace("uima.noNamespace.", "")

        name.text = type_name

        description = etree.SubElement(typeDescription, "description")
        description.text = type_.description

        supertype_name_node = etree.SubElement(typeDescription, "supertypeName")
        supertype_name = type_.supertypeName
        if supertype_name.startswith("uima.noNamespace."):
            supertype_name = supertype_name.replace("uima.noNamespace.", "")
        supertype_name_node.text = supertype_name

        # Only create the `feature` element if there is at least one feature
        feature_list = list(type_.features)
        if feature_list:
            features = etree.SubElement(typeDescription, "features")
            for feature in feature_list:
                self._serialize_feature(features, feature)

        xf.write(typeDescription)

    def _serialize_feature(self, features: etree.Element, feature: Feature):
        featureDescription = etree.SubElement(features, "featureDescription")

        name = etree.SubElement(featureDescription, "name")

        feature_name = feature.name
        # If the feature name is a reserved name like `self`, then we added an
        # underscore to it before so Python can handle it. We now need to remove it.
        if feature._has_reserved_name:
            feature_name = feature_name[:-1]

        name.text = feature_name

        description = etree.SubElement(featureDescription, "description")
        description.text = feature.description

        rangeTypeName = etree.SubElement(featureDescription, "rangeTypeName")
        rangeTypeName.text = feature.rangeTypeName

        if feature.multipleReferencesAllowed is not None:
            multipleReferencesAllowed = etree.SubElement(featureDescription, "multipleReferencesAllowed")
            multipleReferencesAllowed.text = "true" if feature.multipleReferencesAllowed else "false"

        if feature.elementType is not None:
            elementType = etree.SubElement(featureDescription, "elementType")
            elementType.text = feature.elementType


def merge_typesystems(*typesystems: TypeSystem) -> TypeSystem:
    """ Merges several type systems into one.

    If a type is defined in two source file systems, then the features of all of the these types are joined together in+
    the target type system. The exact rules are outlined in
    https://uima.apache.org/d/uimaj-2.10.4/references.html#ugr.ref.cas.typemerging .

    Args:
        *typesystems: The type systems to merge

    Returns:
        A new type system that is the result of merging  all of the type systems together.
    """

    type_list = []

    for ts in typesystems:
        type_list.extend(ts.get_types())

    merged_types = set()
    merged_ts = TypeSystem()

    # A type can only be added if its supertype was added before. We therefore iterate over the list of all
    # types and remove types once we were able to merge it. If we were not able to add a type for one iteration,
    # then it means that the type systems are not mergeable and we abort with an error.
    while True:
        updated_type_list = type_list[:]
        for t in type_list:
            # Check whether the type is ready to be added
            if t.supertypeName not in _PREDEFINED_TYPES and t.supertypeName not in merged_types:
                continue

            # The supertype is defined so we can add the current type to the new type system
            if not merged_ts.contains_type(t.name):
                # Create the type and add its features as it does not exist yet in the merged type system
                created_type = merged_ts.create_type(
                    name=t.name, description=t.description, supertypeName=t.supertypeName
                )

                for feature in t.features:
                    created_type.add_feature(feature)
            else:
                # Type is already defined
                existing_type = merged_ts.get_type(t.name)

                # If the supertypes are not the same, we need to check whether they are at
                # least compatible and then patch the hierarchy
                if t.supertypeName != existing_type.supertypeName:
                    if merged_ts.subsumes(existing_type.supertypeName, t.supertypeName):
                        # Existing supertype subsumes newly specified supertype;
                        # reset supertype to the new, more specific type
                        existing_type.supertypeName = t.supertypeName
                    elif merged_ts.subsumes(t.supertypeName, existing_type.supertypeName):
                        # Newly specified supertype subsumes old type, this is OK and we don't
                        # need to do anything
                        pass
                    else:
                        msg = "Cannot merge type [{0}] with incompatible super types: [{1}] - [{2}]".format(
                            t.name, t.supertypeName, existing_type.supertypeName
                        )
                        raise ValueError(msg)

                # If the type is already defined, merge features
                for feature in t.features:
                    existing_type.add_feature(feature)

            merged_types.add(t.name)
            updated_type_list.remove(t)

        # If there was no progress in the last iteration, then the leftover types cannot be merged
        if len(type_list) == updated_type_list:
            raise ValueError("Unmergeable types" + ", ".join([t.name for t in type_list]))

        # If there are no types to merge left, then we are done
        if len(updated_type_list) == 0:
            break

    return merged_ts


def load_dkpro_core_typesystem() -> TypeSystem:
    # https://stackoverflow.com/a/20885799
    try:
        import importlib.resources as pkg_resources
    except ImportError:
        # Try backported to PY<37 `importlib_resources`.
        import importlib_resources as pkg_resources

    from . import resources  # relative-import the *package* containing the templates

    with pkg_resources.open_binary(resources, "dkpro-core-types.xml") as f:
        return load_typesystem(f)