# Copyright (c) 2014 Ahmed H. Ismail
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals

import re

import six
from six.moves import reduce

from rdflib import Graph
from rdflib import Namespace
from rdflib import RDF
from rdflib import RDFS

from spdx import document
from spdx import utils
from spdx.parsers.builderexceptions import CardinalityError
from spdx.parsers.builderexceptions import SPDXValueError


ERROR_MESSAGES = {
    'DOC_VERS_VALUE': 'Invalid specVersion \'{0}\' must be SPDX-M.N where M and N are numbers.',
    'DOC_D_LICS': 'Invalid dataLicense \'{0}\' must be http://spdx.org/licenses/CC0-1.0.',
    'DOC_SPDX_ID_VALUE': 'Invalid SPDXID value, SPDXID must be the document namespace appended '
                         'by "#SPDXRef-DOCUMENT", line: {0}',
    'DOC_NAMESPACE_VALUE': 'Invalid DocumentNamespace value {0}, must contain a scheme (e.g. "https:") '
                           'and should not contain the "#" delimiter.',
    'LL_VALUE': 'Invalid licenseListVersion \'{0}\' must be of the format N.N where N is a number',
    'CREATED_VALUE': 'Invalid created value \'{0}\' must be date in ISO 8601 format.',
    'CREATOR_VALUE': 'Invalid creator value \'{0}\' must be Organization, Tool or Person.',
    'EXT_DOC_REF_VALUE': 'Failed to extract {0} from ExternalDocumentRef.',
    'PKG_SPDX_ID_VALUE': 'SPDXID must be "SPDXRef-[idstring]" where [idstring] is a unique string containing '
                         'letters, numbers, ".", "-".',
    'PKG_SUPPL_VALUE': 'Invalid package supplier value \'{0}\' must be Organization, Person or NOASSERTION.',
    'PKG_ORIGINATOR_VALUE': 'Invalid package supplier value \'{0}\'  must be Organization, Person or NOASSERTION.',
    'PKG_DOWN_LOC': 'Invalid package download location value \'{0}\'  must be a url or NONE or NOASSERTION',
    'PKG_FILES_ANALYZED_VALUE': 'FilesAnalyzed must be a boolean value, line: {0}',
    'PKG_CONC_LIST': 'Package concluded license list must have more than one member',
    'LICS_LIST_MEMBER' : 'Declaritive or Conjunctive license set member must be a license url or identifier',
    'PKG_SINGLE_LICS' : 'Package concluded license must be a license url or spdx:noassertion or spdx:none.',
    'PKG_LICS_INFO_FILES' : 'Package licenseInfoFromFiles must be a license or spdx:none or spdx:noassertion',
    'FILE_SPDX_ID_VALUE': 'SPDXID must be "SPDXRef-[idstring]" where [idstring] is a unique string containing '
                          'letters, numbers, ".", "-".',
    'PKG_EXT_REF_CATEGORY': '\'{0}\' must be "SECURITY", "PACKAGE-MANAGER", or "OTHER".',
    'PKG_EXT_REF_TYPE': '{0} must be a unique string containing letters, numbers, ".", or "-".',
    'FILE_TYPE' : 'File type must be binary, other, source or archive term.',
    'FILE_SINGLE_LICS': 'File concluded license must be a license url or spdx:noassertion or spdx:none.',
    'REVIEWER_VALUE' : 'Invalid reviewer value \'{0}\' must be Organization, Tool or Person.',
    'REVIEW_DATE' : 'Invalid review date value \'{0}\' must be date in ISO 8601 format.',
    'ANNOTATOR_VALUE': 'Invalid annotator value \'{0}\' must be Organization, Tool or Person.',
    'ANNOTATION_DATE': 'Invalid annotation date value \'{0}\' must be date in ISO 8601 format.',
    'SNIPPET_SPDX_ID_VALUE' : 'SPDXID must be "SPDXRef-[idstring]" where [idstring] is a unique string '
                              'containing letters, numbers, ".", "-".',
    'SNIPPET_SINGLE_LICS' : 'Snippet Concluded License must be a license url or spdx:noassertion or spdx:none.',
    'SNIPPET_LIC_INFO' : 'License Information in Snippet must be a license url or a reference '
                         'to the license, denoted by LicenseRef-[idstring] or spdx:noassertion or spdx:none.',
}


class BaseParser(object):
    """
    Base class for all parsers.
    Contains logger, doap_namespace, spdx_namespace and model builder.
    Also provides utility functions used by the deriving parsers.
    """

    def __init__(self, builder, logger):
        self.logger = logger
        self.doap_namespace = Namespace('http://usefulinc.com/ns/doap#')
        self.spdx_namespace = Namespace("http://spdx.org/rdf/terms#")
        self.builder = builder

    def more_than_one_error(self, field):
        """
        Logs a more than one error.
        field is the field/property that has more than one defined.
        """
        msg = 'More than one {0} defined.'.format(field)
        self.logger.log(msg)
        self.error = True

    def value_error(self, key, bad_value):
        """
        Report a value error using ERROR_MESSAGES dict.
        key - key to use for ERROR_MESSAGES.
        bad_value - is passed to format which is called on what key maps to
        in ERROR_MESSAGES.
        """
        msg = ERROR_MESSAGES[key].format(bad_value)
        self.logger.log(msg)
        self.error = True

    def to_special_value(self, value):
        """
        Check if value is a special SPDX value such as
        NONE, NOASSERTION or UNKNOWN if so returns proper model.
        else returns value
        """
        if value == self.spdx_namespace.none:
            return utils.SPDXNone()
        elif value == self.spdx_namespace.noassertion:
            return utils.NoAssert()
        elif value == self.spdx_namespace.unknown:
            return utils.UnKnown()
        else:
            return six.text_type(value)


class LicenseParser(BaseParser):
    """
    Helper class for parsing extracted licenses and license lists.
    """

    LICS_REF_REGEX = re.compile('LicenseRef-.+', re.UNICODE)

    def __init__(self, builder, logger):
        super(LicenseParser, self).__init__(builder, logger)

    def handle_lics(self, lics):
        """
        Return a License from a `lics` license resource.
        """
        # Handle extracted licensing info type.
        if (lics, RDF.type, self.spdx_namespace['ExtractedLicensingInfo']) in self.graph:
            return self.parse_only_extr_license(lics)

        # Assume resource, hence the path separator
        ident_start = lics.rfind('/') + 1
        if ident_start == 0:
            # special values such as spdx:noassertion
            special = self.to_special_value(lics)
            if special == lics:
                if self.LICS_REF_REGEX.match(lics):
                    # Is a license ref i.e LicenseRef-1
                    return document.License.from_identifier(six.text_type(lics))
                else:
                    # Not a known license form
                    raise SPDXValueError('License')
            else:
                # is a special value
                return special
        else:
            # license url
            return document.License.from_identifier(lics[ident_start:])

    def get_extr_license_ident(self, extr_lic):
        """
        Return a license identifier from an ExtractedLicense or None.
        """
        identifier_tripples = list(self.graph.triples((extr_lic, self.spdx_namespace['licenseId'], None)))

        if not identifier_tripples:
            self.error = True
            msg = 'Extracted license must have licenseId property.'
            self.logger.log(msg)
            return

        if len(identifier_tripples) > 1:
            self.more_than_one_error('extracted license identifier_tripples')
            return

        identifier_tripple = identifier_tripples[0]
        _s, _p, identifier = identifier_tripple
        return six.text_type(identifier)

    def get_extr_license_text(self, extr_lic):
        """
        Return extracted text from an ExtractedLicense or None.
        """
        text_tripples = list(self.graph.triples((extr_lic, self.spdx_namespace['extractedText'], None)))
        if not text_tripples:
            self.error = True
            msg = 'Extracted license must have extractedText property'
            self.logger.log(msg)
            return

        if len(text_tripples) > 1:
            self.more_than_one_error('extracted license text')
            return

        text_tripple = text_tripples[0]
        _s, _p, text = text_tripple
        return six.text_type(text)

    def get_extr_lic_name(self, extr_lic):
        """
        Return the license name from an ExtractedLicense or None
        """
        extr_name_list = list(self.graph.triples((extr_lic, self.spdx_namespace['licenseName'], None)))
        if len(extr_name_list) > 1:
            self.more_than_one_error('extracted license name')
            return
        elif len(extr_name_list) == 0:
            return
        return six.text_type(self.to_special_value(extr_name_list[0][2]))

    def get_extr_lics_xref(self, extr_lic):
        """
        Return a list of cross references.
        """
        xrefs = list(self.graph.triples((extr_lic, RDFS.seeAlso, None)))
        return list(map(lambda xref_triple: xref_triple[2], xrefs))

    def get_extr_lics_comment(self, extr_lics):
        """
        Return license comment or None.
        """
        comment_list = list(self.graph.triples(
            (extr_lics, RDFS.comment, None)))
        if len(comment_list) > 1 :
            self.more_than_one_error('extracted license comment')
            return
        elif len(comment_list) == 1:
            return six.text_type(comment_list[0][2])
        else:
            return

    def parse_only_extr_license(self, extr_lic):
        """
        Return an ExtractedLicense object to represent a license object.
        But does not add it to the SPDXDocument model.
        Return None if failed.
        """
        # Grab all possible values
        ident = self.get_extr_license_ident(extr_lic)
        text = self.get_extr_license_text(extr_lic)
        comment = self.get_extr_lics_comment(extr_lic)
        xrefs = self.get_extr_lics_xref(extr_lic)
        name = self.get_extr_lic_name(extr_lic)

        if not ident:
            # Must have identifier
            return

        # Set fields
        # FIXME: the constructor of the license should alwas accept a name
        lic = document.ExtractedLicense(ident)
        if text is not None:
            lic.text = text
        if name is not None:
            lic.full_name = name
        if comment is not None:
            lic.comment = comment
        lic.cross_ref = list(map(lambda x: six.text_type(x), xrefs))
        return lic

    def handle_extracted_license(self, extr_lic):
        """
        Build and return an ExtractedLicense or None.
        Note that this function adds the license to the document.
        """
        lic = self.parse_only_extr_license(extr_lic)
        if lic is not None:
            self.doc.add_extr_lic(lic)
        return lic

    def _handle_license_list(self, lics_set, cls=None):
        """
        Return a license representing a `cls` object (LicenseConjunction
        or LicenseDisjunction) from a list of license resources or None.
        """
        licenses = []
        for _, _, lics_member in self.graph.triples(
            (lics_set, self.spdx_namespace['member'], None)):
            try:
                licenses.append(self.handle_lics(lics_member))
            except CardinalityError:
                self.value_error('LICS_LIST_MEMBER', lics_member)
                break
        if len(licenses) > 1:
            return reduce(lambda a, b: cls(a, b), licenses)
        else:
            self.value_error('PKG_CONC_LIST', '')
            return

    def handle_conjunctive_list(self, lics_set):
        """
        Return a license representing the conjunction from a list of
        license resources or None.
        """
        return self._handle_license_list(lics_set, cls=document.LicenseConjunction)

    def handle_disjunctive_list(self, lics_set):
        """
        Return a license representing the disjunction from a list of
        license resources or None.
        """
        return self._handle_license_list(lics_set, cls=document.LicenseDisjunction)


class PackageParser(LicenseParser):
    """
    Helper class for parsing packages.
    """

    def __init__(self, builder, logger):
        super(PackageParser, self).__init__(builder, logger)

    def parse_package(self, p_term):
        """
        Parse package fields.
        """
        # Check there is a pacakge name
        if not (p_term, self.spdx_namespace['name'], None) in self.graph:
            self.error = True
            self.logger.log('Package must have a name.')
            # Create dummy package so that we may continue parsing the rest of
            # the package fields.
            self.builder.create_package(self.doc, 'dummy_package')
        else:
            for _s, _p, o in self.graph.triples((p_term, self.spdx_namespace['name'], None)):
                try:
                    self.builder.create_package(self.doc, six.text_type(o))
                except CardinalityError:
                    self.more_than_one_error('Package name')
                    break
        # Set SPDXID
        try:
            if p_term.count('#', 0, len(p_term)) == 1:
                pkg_spdx_id = p_term.split('#')[-1]
                self.builder.set_pkg_spdx_id(self.doc, pkg_spdx_id)
            else:
                self.value_error('PKG_SPDX_ID_VALUE', p_term)
        except SPDXValueError:
            self.value_error('PKG_SPDX_ID_VALUE', p_term)

        self.p_pkg_vinfo(p_term, self.spdx_namespace['versionInfo'])
        self.p_pkg_fname(p_term, self.spdx_namespace['packageFileName'])
        self.p_pkg_suppl(p_term, self.spdx_namespace['supplier'])
        self.p_pkg_originator(p_term, self.spdx_namespace['originator'])
        self.p_pkg_down_loc(p_term, self.spdx_namespace['downloadLocation'])
        self.p_pkg_files_analyzed(p_term, self.spdx_namespace['filesAnalyzed'])
        self.p_pkg_homepg(p_term, self.doap_namespace['homepage'])
        self.p_pkg_chk_sum(p_term, self.spdx_namespace['checksum'])
        self.p_pkg_src_info(p_term, self.spdx_namespace['sourceInfo'])
        self.p_pkg_verif_code(p_term, self.spdx_namespace['packageVerificationCode'])
        self.p_pkg_lic_conc(p_term, self.spdx_namespace['licenseConcluded'])
        self.p_pkg_lic_decl(p_term, self.spdx_namespace['licenseDeclared'])
        self.p_pkg_lics_info_from_files(p_term, self.spdx_namespace['licenseInfoFromFiles'])
        self.p_pkg_comments_on_lics(p_term, self.spdx_namespace['licenseComments'])
        self.p_pkg_cr_text(p_term, self.spdx_namespace['copyrightText'])
        self.p_pkg_summary(p_term, self.spdx_namespace['summary'])
        self.p_pkg_descr(p_term, self.spdx_namespace['description'])
        self.p_pkg_comment(p_term, self.spdx_namespace['comment'])

    def p_pkg_cr_text(self, p_term, predicate):
        try:
            for _, _, text in self.graph.triples((p_term, predicate, None)):
                self.builder.set_pkg_cr_text(self.doc, six.text_type(self.to_special_value(text)))
        except CardinalityError:
            self.more_than_one_error('package copyright text')

    def p_pkg_summary(self, p_term, predicate):
        try:
            for _, _, summary in self.graph.triples((p_term, predicate, None)):
                self.builder.set_pkg_summary(self.doc, six.text_type(summary))
        except CardinalityError:
            self.more_than_one_error('package summary')

    def p_pkg_descr(self, p_term, predicate):
        try:
            for _, _, desc in self.graph.triples(
                (p_term, predicate, None)):
                self.builder.set_pkg_desc(self.doc, six.text_type(desc))
        except CardinalityError:
            self.more_than_one_error('package description')

    def p_pkg_comment(self, p_term, predicate):
        try:
            for _, _, comment in self.graph.triples((p_term, predicate, None)):
                self.builder.set_pkg_comment(self.doc, six.text_type(comment))
        except CardinalityError:
            self.more_than_one_error('package comment')

    def p_pkg_comments_on_lics(self, p_term, predicate):
        for _, _, comment in self.graph.triples((p_term, predicate, None)):
            try:
                self.builder.set_pkg_license_comment(self.doc, six.text_type(comment))
            except CardinalityError:
                self.more_than_one_error('package comments on license')
                break

    def p_pkg_lics_info_from_files(self, p_term, predicate):
        for _, _, lics in self.graph.triples((p_term, predicate, None)):
            try:
                if (lics, RDF.type, self.spdx_namespace['ExtractedLicensingInfo']) in self.graph:
                    self.builder.set_pkg_license_from_file(self.doc, self.parse_only_extr_license(lics))
                else:
                    self.builder.set_pkg_license_from_file(self.doc, self.handle_lics(lics))

            except SPDXValueError:
                self.value_error('PKG_LICS_INFO_FILES', lics)

    def p_pkg_lic_decl(self, p_term, predicate):
        self.handle_pkg_lic(p_term, predicate, self.builder.set_pkg_license_declared)

    def handle_pkg_lic(self, p_term, predicate, builder_func):
        """
        Handle package lics concluded or declared.
        """
        try:
            for _, _, licenses in self.graph.triples((p_term, predicate, None)):
                if (licenses, RDF.type, self.spdx_namespace['ConjunctiveLicenseSet']) in self.graph:
                    lics = self.handle_conjunctive_list(licenses)
                    builder_func(self.doc, lics)

                elif (licenses, RDF.type, self.spdx_namespace['DisjunctiveLicenseSet']) in self.graph:
                    lics = self.handle_disjunctive_list(licenses)
                    builder_func(self.doc, lics)

                else:
                    try:
                        lics = self.handle_lics(licenses)
                        builder_func(self.doc, lics)
                    except SPDXValueError:
                        self.value_error('PKG_SINGLE_LICS', licenses)
        except CardinalityError:
            self.more_than_one_error('package {0}'.format(predicate))

    def p_pkg_lic_conc(self, p_term, predicate):
        self.handle_pkg_lic(p_term, predicate, self.builder.set_pkg_licenses_concluded)

    def p_pkg_verif_code(self, p_term, predicate):
        for _, _, verifcode in self.graph.triples((p_term, predicate, None)):
            # Parse verification code
            for _, _, code in self.graph.triples((verifcode, self.spdx_namespace['packageVerificationCodeValue'], None)):
                try:
                    self.builder.set_pkg_verif_code(self.doc, six.text_type(code))
                except CardinalityError:
                    self.more_than_one_error('package verificaton code')
                    break
            # Parse excluded file
            for _, _, filename in self.graph.triples((verifcode, self.spdx_namespace['packageVerificationCodeExcludedFile'], None)):
                try:
                    self.builder.set_pkg_excl_file(self.doc, six.text_type(filename))
                except CardinalityError:
                    self.more_than_one_error('package verificaton code excluded file')
                    break

    def p_pkg_src_info(self, p_term, predicate):
        for _, _, o in self.graph.triples((p_term, predicate, None)):
            try:
                self.builder.set_pkg_source_info(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('package source info')
                break

    def p_pkg_chk_sum(self, p_term, predicate):
        for _s, _p, checksum in self.graph.triples((p_term, predicate, None)):
            for _, _, value in self.graph.triples((checksum, self.spdx_namespace['checksumValue'], None)):
                try:
                    self.builder.set_pkg_chk_sum(self.doc, six.text_type(value))
                except CardinalityError:
                    self.more_than_one_error('Package checksum')
                    break

    def p_pkg_homepg(self, p_term, predicate):
        for _s, _p, o in self.graph.triples((p_term, predicate, None)):
            try:
                self.builder.set_pkg_home(self.doc, six.text_type(self.to_special_value(o)))
            except CardinalityError:
                self.more_than_one_error('Package home page')
                break
            except SPDXValueError:
                self.value_error('PKG_HOME_PAGE', o)

    def p_pkg_down_loc(self, p_term, predicate):
        for _s, _p, o in self.graph.triples((p_term, predicate, None)):
            try:
                self.builder.set_pkg_down_location(self.doc, six.text_type(self.to_special_value(o)))
            except CardinalityError:
                self.more_than_one_error('Package download location')
                break
            except SPDXValueError:
                self.value_error('PKG_DOWN_LOC', o)

    def p_pkg_files_analyzed(self, p_term, predicate):
        for _s, _p, o in self.graph.triples((p_term, predicate, None)):
            try:
                self.builder.set_pkg_files_analyzed(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('Package Files Analyzed')
                break
            except SPDXValueError:
                self.value_error('PKG_FILES_ANALYZED_VALUE', o)

    def p_pkg_originator(self, p_term, predicate):
        for _s, _p, o in self.graph.triples((p_term, predicate, None)):
            try:
                if o == "NOASSERTION":
                    self.builder.set_pkg_originator(self.doc, utils.NoAssert())
                else:
                    ent = self.builder.create_entity(self.doc, six.text_type(o))
                    self.builder.set_pkg_originator(self.doc, ent)
            except CardinalityError:
                self.more_than_one_error('Package originator')
                break
            except SPDXValueError:
                self.value_error('PKG_ORIGINATOR_VALUE', o)

    def p_pkg_suppl(self, p_term, predicate):
        for _s, _p, o in self.graph.triples((p_term, predicate, None)):
            try:
                if o == "NOASSERTION":
                    self.builder.set_pkg_supplier(self.doc, utils.NoAssert())
                else:
                    ent = self.builder.create_entity(self.doc, six.text_type(o))
                    self.builder.set_pkg_supplier(self.doc, ent)
            except CardinalityError:
                self.more_than_one_error('Package supplier')
                break
            except SPDXValueError:
                self.value_error('PKG_SUPPL_VALUE', o)

    def p_pkg_fname(self, p_term, predicate):
        for _s, _p, o in self.graph.triples((p_term, predicate, None)):
            try:
                self.builder.set_pkg_file_name(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('Package file name')
                break

    def p_pkg_vinfo(self, p_term, predicate):
        for _s, _p, o in self.graph.triples((p_term, predicate, None)):
            try:
                self.builder.set_pkg_vers(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('Package version info')
                break


class FileParser(LicenseParser):
    """
    Helper class for parsing files.
    """

    def __init__(self, builder, logger):
        super(FileParser, self).__init__(builder, logger)

    def parse_file(self, f_term):
        if not (f_term, self.spdx_namespace['fileName'], None) in self.graph:
            self.error = True
            self.logger.log('File must have a name.')
            # Dummy name to continue
            self.builder.set_file_name(self.doc, 'Dummy file')
        else:
            for _, _, name in self.graph.triples((f_term, self.spdx_namespace['fileName'], None)):
                self.builder.set_file_name(self.doc, six.text_type(name))

        self.p_file_spdx_id(f_term, self.spdx_namespace['File'])
        self.p_file_type(f_term, self.spdx_namespace['fileType'])
        self.p_file_chk_sum(f_term, self.spdx_namespace['checksum'])
        self.p_file_lic_conc(f_term, self.spdx_namespace['licenseConcluded'])
        self.p_file_lic_info(f_term, self.spdx_namespace['licenseInfoInFile'])
        self.p_file_comments_on_lics(f_term, self.spdx_namespace['licenseComments'])
        self.p_file_cr_text(f_term, self.spdx_namespace['copyrightText'])
        self.p_file_artifact(f_term, self.spdx_namespace['artifactOf'])
        self.p_file_comment(f_term, RDFS.comment)
        self.p_file_notice(f_term, self.spdx_namespace['noticeText'])
        self.p_file_contributor(f_term, self.spdx_namespace['fileContributor'])
        self.p_file_depends(f_term, self.spdx_namespace['fileDependency'])

    def get_file_name(self, f_term):
        """Returns first found fileName property or None if not found."""
        for _, _, name in self.graph.triples((f_term, self.spdx_namespace['fileName'], None)):
            return name
        return

    def p_file_depends(self, f_term, predicate):
        """
        Set file dependencies.
        """
        for _, _, other_file in self.graph.triples((f_term, predicate, None)):
            name = self.get_file_name(other_file)
            if name is not None:
                self.builder.add_file_dep(six.text_type(name))
            else:
                self.error = True
                msg = 'File depends on file with no name'
                self.logger.log(msg)

    def p_file_contributor(self, f_term, predicate):
        """
        Parse all file contributors and adds them to the model.
        """
        for _, _, contributor in self.graph.triples((f_term, predicate, None)):
            self.builder.add_file_contribution(self.doc, six.text_type(contributor))

    def p_file_notice(self, f_term, predicate):
        """
        Set file notice text.
        """
        try:
            for _, _, notice in self.graph.triples((f_term, predicate, None)):
                self.builder.set_file_notice(self.doc, six.text_type(notice))
        except CardinalityError:
            self.more_than_one_error('file notice')

    def p_file_comment(self, f_term, predicate):
        """
        Set file comment text.
        """
        try:
            for _, _, comment in self.graph.triples((f_term, predicate, None)):
                self.builder.set_file_comment(self.doc, six.text_type(comment))
        except CardinalityError:
            self.more_than_one_error('file comment')


    def p_file_artifact(self, f_term, predicate):
        """
        Handle file artifactOf.
        Note: does not handle artifact of project URI.
        """
        for _, _, project in self.graph.triples((f_term, predicate, None)):
            if (project, RDF.type, self.doap_namespace['Project']):
                self.p_file_project(project)
            else:
                self.error = True
                msg = 'File must be artifact of doap:Project'
                self.logger.log(msg)

    def p_file_project(self, project):
        """
        Helper function for parsing doap:project name and homepage.
        and setting them using the file builder.
        """
        for _, _, name in self.graph.triples((project, self.doap_namespace['name'], None)):
            self.builder.set_file_atrificat_of_project(self.doc, 'name', six.text_type(name))
        for _, _, homepage in self.graph.triples(
            (project, self.doap_namespace['homepage'], None)):
            self.builder.set_file_atrificat_of_project(self.doc, 'home', six.text_type(homepage))

    def p_file_cr_text(self, f_term, predicate):
        """
        Set file copyright text.
        """
        try:
            for _, _, cr_text in self.graph.triples((f_term, predicate, None)):
                self.builder.set_file_copyright(self.doc, six.text_type(cr_text))
        except CardinalityError:
            self.more_than_one_error('file copyright text')

    def p_file_comments_on_lics(self, f_term, predicate):
        """
        Set file license comment.
        """
        try:
            for _, _, comment in self.graph.triples((f_term, predicate, None)):
                self.builder.set_file_license_comment(self.doc, six.text_type(comment))
        except CardinalityError:
            self.more_than_one_error('file comments on license')

    def p_file_lic_info(self, f_term, predicate):
        """
        Set file license information.
        """
        for _, _, info in self.graph.triples((f_term, predicate, None)):
            lic = self.handle_lics(info)
            if lic is not None:
                self.builder.set_file_license_in_file(self.doc, lic)

    def p_file_spdx_id(self, f_term, predicate):
        try:
            try:
                self.builder.set_file_spdx_id(self.doc, six.text_type(f_term))
            except SPDXValueError:
                self.value_error('FILE_SPDX_ID_VALUE', f_term)
        except CardinalityError:
            self.more_than_one_error('FILE_SPDX_ID_VALUE')

    def p_file_type(self, f_term, predicate):
        """
        Set file type.
        """
        try:
            for _, _, ftype in self.graph.triples((f_term, predicate, None)):
                try:
                    if ftype.endswith('binary'):
                        ftype = 'BINARY'
                    elif ftype.endswith('source'):
                        ftype = 'SOURCE'
                    elif ftype.endswith('other'):
                        ftype = 'OTHER'
                    elif ftype.endswith('archive'):
                        ftype = 'ARCHIVE'
                    self.builder.set_file_type(self.doc, ftype)
                except SPDXValueError:
                    self.value_error('FILE_TYPE', ftype)
        except CardinalityError:
            self.more_than_one_error('file type')

    def p_file_chk_sum(self, f_term, predicate):
        """
        Set file checksum. Assumes SHA1 algorithm without checking.
        """
        try:
            for _s, _p, checksum in self.graph.triples((f_term, predicate, None)):
                for _, _, value in self.graph.triples((checksum, self.spdx_namespace['checksumValue'], None)):
                    self.builder.set_file_chksum(self.doc, six.text_type(value))
        except CardinalityError:
            self.more_than_one_error('File checksum')

    def p_file_lic_conc(self, f_term, predicate):
        """
        Set file licenses concluded.
        """
        try:
            for _, _, licenses in self.graph.triples((f_term, predicate, None)):
                if (licenses, RDF.type, self.spdx_namespace['ConjunctiveLicenseSet']) in self.graph:
                    lics = self.handle_conjunctive_list(licenses)
                    self.builder.set_concluded_license(self.doc, lics)

                elif (licenses, RDF.type, self.spdx_namespace['DisjunctiveLicenseSet']) in self.graph:
                    lics = self.handle_disjunctive_list(licenses)
                    self.builder.set_concluded_license(self.doc, lics)

                else:
                    try:
                        lics = self.handle_lics(licenses)
                        self.builder.set_concluded_license(self.doc, lics)
                    except SPDXValueError:
                        self.value_error('FILE_SINGLE_LICS', licenses)
        except CardinalityError:
            self.more_than_one_error('file {0}'.format(predicate))


class SnippetParser(LicenseParser):
    """
    Helper class for parsing snippet information.
    """

    def __init__(self, builder, logger):
        super(SnippetParser, self).__init__(builder, logger)

    def parse_snippet(self, snippet_term):
        try:
            self.builder.create_snippet(self.doc, snippet_term)
        except SPDXValueError:
            self.value_error('SNIPPET_SPDX_ID_VALUE', snippet_term)

        for _s, _p, o in self.graph.triples((snippet_term, self.spdx_namespace['name'], None)):
            try:
                self.builder.set_snippet_name(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('snippetName')
                break

        for _s, _p, o in self.graph.triples((snippet_term, self.spdx_namespace['licenseComments'], None)):
            try:
                self.builder.set_snippet_lic_comment(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('licenseComments')
                break

        for _s, _p, o in self.graph.triples((snippet_term, RDFS.comment, None)):
            try:
                self.builder.set_snippet_comment(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('comment')
                break

        for _s, _p, o in self.graph.triples((snippet_term, self.spdx_namespace['copyrightText'], None)):
            try:
                self.builder.set_snippet_copyright(self.doc, self.to_special_value(six.text_type(o)))
            except CardinalityError:
                self.more_than_one_error('copyrightText')
                break

        try:
            for _, _, licenses in self.graph.triples(
                    (snippet_term, self.spdx_namespace['licenseConcluded'], None)):
                if (licenses, RDF.type, self.spdx_namespace['ConjunctiveLicenseSet']) in self.graph:
                    lics = self.handle_conjunctive_list(licenses)
                    self.builder.set_snip_concluded_license(self.doc, lics)

                elif (licenses, RDF.type, self.spdx_namespace['DisjunctiveLicenseSet']) in self.graph:
                    lics = self.handle_disjunctive_list(licenses)
                    self.builder.set_snip_concluded_license(self.doc, lics)

                else:
                    try:
                        lics = self.handle_lics(licenses)
                        self.builder.set_snip_concluded_license(self.doc, lics)
                    except SPDXValueError:
                        self.value_error('SNIPPET_SINGLE_LICS', licenses)
        except CardinalityError:
            self.more_than_one_error('package {0}'.format(
                self.spdx_namespace['licenseConcluded']))

        for _, _, info in self.graph.triples(
                (snippet_term, self.spdx_namespace['licenseInfoInSnippet'], None)):
            lic = self.handle_lics(info)
            if lic is not None:
                try:
                    self.builder.set_snippet_lics_info(self.doc, lic)
                except SPDXValueError:
                    self.value_error('SNIPPET_LIC_INFO', lic)

        for _s, _p, o in self.graph.triples(
                (snippet_term, self.spdx_namespace['snippetFromFile'], None)):
            try:
                self.builder.set_snip_from_file_spdxid(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('snippetFromFile')
                break


class ReviewParser(BaseParser):
    """
    Helper class for parsing review information.
    """

    def __init__(self, builder, logger):
        super(ReviewParser, self).__init__(builder, logger)

    def parse_review(self, r_term):
        reviewer = self.get_reviewer(r_term)
        reviewed_date = self.get_review_date(r_term)
        if reviewer is not None:
            self.builder.add_reviewer(self.doc, reviewer)
            if reviewed_date is not None:
                try:
                    self.builder.add_review_date(self.doc, reviewed_date)
                except SPDXValueError:
                    self.value_error('REVIEW_DATE', reviewed_date)
            comment = self.get_review_comment(r_term)
            if comment is not None:
                self.builder.add_review_comment(self.doc, comment)

    def get_review_comment(self, r_term):
        """
        Return review comment or None if found none or more than one.
        Report errors.
        """
        comment_list = list(self.graph.triples((r_term, RDFS.comment, None)))
        if len(comment_list) > 1:
            self.error = True
            msg = 'Review can have at most one comment'
            self.logger.log(msg)
            return
        else:
            return six.text_type(comment_list[0][2])

    def get_review_date(self, r_term):
        """
        Return review date or None if not found.
        Report error on failure.
        Note does not check value format.
        """
        reviewed_list = list(self.graph.triples((r_term, self.spdx_namespace['reviewDate'], None)))
        if len(reviewed_list) != 1:
            self.error = True
            msg = 'Review must have exactlyone review date'
            self.logger.log(msg)
            return
        return six.text_type(reviewed_list[0][2])

    def get_reviewer(self, r_term):
        """
        Return reviewer as creator object or None if failed.
        Report errors on failure.
        """
        reviewer_list = list(self.graph.triples((r_term, self.spdx_namespace['reviewer'], None)))
        if len(reviewer_list) != 1:
            self.error = True
            msg = 'Review must have exactly one reviewer'
            self.logger.log(msg)
            return
        try:
            return self.builder.create_entity(self.doc, six.text_type(reviewer_list[0][2]))
        except SPDXValueError:
            self.value_error('REVIEWER_VALUE', reviewer_list[0][2])


class AnnotationParser(BaseParser):
    """
    Helper class for parsing annotation information.
    """

    def __init__(self, builder, logger):
        super(AnnotationParser, self).__init__(builder, logger)

    def parse_annotation(self, r_term):
        annotator = self.get_annotator(r_term)
        annotation_date = self.get_annotation_date(r_term)
        if annotator is not None:
            self.builder.add_annotator(self.doc, annotator)
            if annotation_date is not None:
                try:
                    self.builder.add_annotation_date(self.doc, annotation_date)
                except SPDXValueError:
                    self.value_error('ANNOTATION_DATE', annotation_date)
            comment = self.get_annotation_comment(r_term)
            if comment is not None:
                self.builder.add_annotation_comment(self.doc, comment)
            annotation_type = self.get_annotation_type(r_term)
            self.builder.add_annotation_type(self.doc, annotation_type)
            try:
                self.builder.set_annotation_spdx_id(self.doc, six.text_type(r_term))
            except CardinalityError:
                self.more_than_one_error('SPDX Identifier Reference')

    def get_annotation_type(self, r_term):
        """
        Return annotation type or None if found none or more than one.
        Report errors on failure.
        """
        for _, _, typ in self.graph.triples((
                r_term, self.spdx_namespace['annotationType'], None)):
            if typ is not None:
                return six.text_type(typ)
            else:
                self.error = True
                msg = 'Annotation must have exactly one annotation type.'
                self.logger.log(msg)
                return

    def get_annotation_comment(self, r_term):
        """
        Return annotation comment or None if found none or more than one.
        Report errors.
        """
        comment_list = list(self.graph.triples((r_term, RDFS.comment, None)))
        if len(comment_list) > 1:
            self.error = True
            msg = 'Annotation can have at most one comment.'
            self.logger.log(msg)
            return
        else:
            return six.text_type(comment_list[0][2])

    def get_annotation_date(self, r_term):
        """
        Return annotation date or None if not found.
        Report error on failure.
        Note does not check value format.
        """
        annotation_date_list = list(self.graph.triples((r_term, self.spdx_namespace['annotationDate'], None)))
        if len(annotation_date_list) != 1:
            self.error = True
            msg = 'Annotation must have exactly one annotation date.'
            self.logger.log(msg)
            return
        return six.text_type(annotation_date_list[0][2])

    def get_annotator(self, r_term):
        """
        Return annotator as creator object or None if failed.
        Report errors on failure.
        """
        annotator_list = list(self.graph.triples((r_term, self.spdx_namespace['annotator'], None)))
        if len(annotator_list) != 1:
            self.error = True
            msg = 'Annotation must have exactly one annotator'
            self.logger.log(msg)
            return
        try:
            return self.builder.create_entity(self.doc, six.text_type(annotator_list[0][2]))
        except SPDXValueError:
            self.value_error('ANNOTATOR_VALUE', annotator_list[0][2])


class Parser(PackageParser, FileParser, SnippetParser, ReviewParser, AnnotationParser):
    """
    RDF/XML file parser.
    """

    def __init__(self, builder, logger):
        super(Parser, self).__init__(builder, logger)

    def parse(self, fil):
        """
        Parse a file and returns a document object.
        fil is a file like object.
        """
        self.error = False
        self.graph = Graph()
        self.graph.parse(file=fil, format='xml')
        self.doc = document.Document()

        for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['SpdxDocument'])):
            self.parse_doc_fields(s)

        for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['ExternalDocumentRef'])):
            self.parse_ext_doc_ref(s)

        for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['CreationInfo'])):
            self.parse_creation_info(s)

        for s, _p, o in self.graph.triples((None, None, self.spdx_namespace['ExtractedLicensingInfo'])):
            self.handle_extracted_license(s)

        for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['Package'])):
            self.parse_package(s)

        for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['ExternalRef'])):
            self.parse_pkg_ext_ref(s)

        for s, _p, o in self.graph.triples((None, self.spdx_namespace['referencesFile'], None)):
            self.parse_file(o)

        for s, _p, o in self.graph.triples((None, RDF.type, self.spdx_namespace['Snippet'])):
            self.parse_snippet(s)

        for s, _p, o in self.graph.triples((None, self.spdx_namespace['reviewed'], None)):
            self.parse_review(o)

        for s, _p, o in self.graph.triples((None, self.spdx_namespace['annotation'], None)):
            self.parse_annotation(o)

        validation_messages = []
        # Report extra errors if self.error is False otherwise there will be
        # redundent messages
        validation_messages = self.doc.validate(validation_messages)
        if not self.error:
            if validation_messages:
                for msg in validation_messages:
                    self.logger.log(msg)
                self.error = True
        return self.doc, self.error

    def parse_creation_info(self, ci_term):
        """
        Parse creators, created and comment.
        """
        for _s, _p, o in self.graph.triples((ci_term, self.spdx_namespace['creator'], None)):
            try:
                ent = self.builder.create_entity(self.doc, six.text_type(o))
                self.builder.add_creator(self.doc, ent)
            except SPDXValueError:
                self.value_error('CREATOR_VALUE', o)

        for _s, _p, o in self.graph.triples((ci_term, self.spdx_namespace['created'], None)):
            try:
                self.builder.set_created_date(self.doc, six.text_type(o))
            except SPDXValueError:
                self.value_error('CREATED_VALUE', o)
            except CardinalityError:
                self.more_than_one_error('created')
                break

        for _s, _p, o in self.graph.triples((ci_term, RDFS.comment, None)):
            try:
                self.builder.set_creation_comment(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('CreationInfo comment')
                break
        for _s, _p, o in self.graph.triples((ci_term, self.spdx_namespace['licenseListVersion'], None)):
            try:
                self.builder.set_lics_list_ver(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('licenseListVersion')
                break
            except SPDXValueError:
                self.value_error('LL_VALUE', o)

    def parse_doc_fields(self, doc_term):
        """
        Parse the version, data license, name, SPDX Identifier, namespace,
        and comment.
        """
        try:
            self.builder.set_doc_spdx_id(self.doc, six.text_type(doc_term))
        except SPDXValueError:
            self.value_error('DOC_SPDX_ID_VALUE', doc_term)
        try:
            if doc_term.count('#', 0, len(doc_term)) <= 1:
                doc_namespace = doc_term.split('#')[0]
                self.builder.set_doc_namespace(self.doc, doc_namespace)
            else:
                self.value_error('DOC_NAMESPACE_VALUE', doc_term)
        except SPDXValueError:
            self.value_error('DOC_NAMESPACE_VALUE', doc_term)
        for _s, _p, o in self.graph.triples((doc_term, self.spdx_namespace['specVersion'], None)):
            try:
                self.builder.set_doc_version(self.doc, six.text_type(o))
            except SPDXValueError:
                self.value_error('DOC_VERS_VALUE', o)
            except CardinalityError:
                self.more_than_one_error('specVersion')
                break
        for _s, _p, o in self.graph.triples((doc_term, self.spdx_namespace['dataLicense'], None)):
            try:
                self.builder.set_doc_data_lic(self.doc, six.text_type(o))
            except SPDXValueError:
                self.value_error('DOC_D_LICS', o)
            except CardinalityError:
                self.more_than_one_error('dataLicense')
                break
        for _s, _p, o in self.graph.triples(
                (doc_term, self.spdx_namespace['name'], None)):
            try:
                self.builder.set_doc_name(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('name')
                break
        for _s, _p, o in self.graph.triples((doc_term, RDFS.comment, None)):
            try:
                self.builder.set_doc_comment(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('Document comment')
                break

    def parse_ext_doc_ref(self, ext_doc_ref_term):
        """
        Parse the External Document ID, SPDX Document URI and Checksum.
        """
        for _s, _p, o in self.graph.triples(
                (ext_doc_ref_term,
                 self.spdx_namespace['externalDocumentId'],
                 None)):
            try:
                self.builder.set_ext_doc_id(self.doc, six.text_type(o))
            except SPDXValueError:
                self.value_error('EXT_DOC_REF_VALUE', 'External Document ID')
                break

        for _s, _p, o in self.graph.triples(
                (ext_doc_ref_term,
                 self.spdx_namespace['spdxDocument'],
                 None)):
            try:
                self.builder.set_spdx_doc_uri(self.doc, six.text_type(o))
            except SPDXValueError:
                self.value_error('EXT_DOC_REF_VALUE', 'SPDX Document URI')
                break

        for _s, _p, checksum in self.graph.triples(
                (ext_doc_ref_term, self.spdx_namespace['checksum'], None)):
            for _, _, value in self.graph.triples(
                    (checksum, self.spdx_namespace['checksumValue'], None)):
                try:
                    self.builder.set_chksum(self.doc, six.text_type(value))
                except SPDXValueError:
                    self.value_error('EXT_DOC_REF_VALUE', 'Checksum')
                    break

    def parse_pkg_ext_ref(self, pkg_ext_term):
        """
        Parse the category, type, locator, and comment.
        """
        for _s, _p, o in self.graph.triples((pkg_ext_term,
                                             self.spdx_namespace['referenceCategory'],
                                             None)):
            try:
                self.builder.set_pkg_ext_ref_category(self.doc, six.text_type(o))
            except SPDXValueError:
                self.value_error('PKG_EXT_REF_CATEGORY',
                                 'Package External Reference Category')
                break

        for _s, _p, o in self.graph.triples((pkg_ext_term,
                                             self.spdx_namespace['referenceType'],
                                             None)):
            try:
                self.builder.set_pkg_ext_ref_type(self.doc, six.text_type(o))
            except SPDXValueError:
                self.value_error('PKG_EXT_REF_TYPE',
                                 'Package External Reference Type')
                break

        for _s, _p, o in self.graph.triples((pkg_ext_term,
                                             self.spdx_namespace['referenceLocator'],
                                             None)):
            self.builder.set_pkg_ext_ref_locator(self.doc, six.text_type(o))

        for _s, _p, o in self.graph.triples((pkg_ext_term, RDFS.comment, None)):
            try:
                self.builder.set_pkg_ext_ref_comment(self.doc, six.text_type(o))
            except CardinalityError:
                self.more_than_one_error('Package External Reference Comment')
                break