# Copyright (c) the SPDX tools authors # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from collections import OrderedDict import io import json import ntpath import os import posixpath import re import xmltodict import yaml import spdx from spdx import utils test_data_dir = os.path.join(os.path.dirname(__file__), 'data') def get_test_loc(test_path, test_data_dir=test_data_dir, debug=False, exists=True): """ Given a `test_path` relative to the `test_data_dir` directory, return the location to a test file or directory for this path. No copy is done. """ if debug: import inspect caller = inspect.stack()[1][3] print('\nget_test_loc,%(caller)s,"%(test_path)s","%(test_data_dir)s"' % locals()) assert test_path assert test_data_dir if not os.path.exists(test_data_dir): raise IOError("[Errno 2] No such directory: test_data_dir not found:" " '%(test_data_dir)s'" % locals()) tpath = to_os_native_path(test_path) test_loc = os.path.abspath(os.path.join(test_data_dir, tpath)) if exists and not os.path.exists(test_loc): raise IOError("[Errno 2] No such file or directory: " "test_path not found: '%(test_loc)s'" % locals()) return test_loc def to_os_native_path(path): """ Normalize a path to use the native OS path separator. """ path = path.replace(posixpath.sep, os.path.sep) path = path.replace(ntpath.sep, os.path.sep) path = path.rstrip(os.path.sep) return path def strip_variable_text(rdf_text): """ Return rdf_text stripped from variable parts such as rdf nodeids """ replace_nid = re.compile('rdf:nodeID="[^\"]*"').sub rdf_text = replace_nid('', rdf_text) replace_creation = re.compile('<ns1:creationInfo>.*</ns1:creationInfo>', re.DOTALL).sub rdf_text = replace_creation('', rdf_text) replace_pcc = re.compile('<ns1:packageVerificationCode>.*</ns1:packageVerificationCode>', re.DOTALL).sub rdf_text = replace_pcc('', rdf_text) return rdf_text def load_and_clean_rdf(location): """ Return plain Python nested data for the SPDX RDF file at location suitable for comparison. The file content is cleaned from variable parts such as dates, generated UUIDs and versions NOTE: we use plain dicts to avoid ordering issues in XML. the SPDX tool and lxml do not seem to return a consistent ordering that is needed for tests. """ with io.open(location, encoding='utf-8') as l: content = l.read() content = strip_variable_text(content) data = xmltodict.parse(content, dict_constructor=dict) return sort_nested(data) def sort_nested(data): """ Return a new dict with any nested list sorted recursively. """ if isinstance(data, dict): new_data = {} for k, v in data.items(): if isinstance(v, list): v = sorted(v) if isinstance(v, dict): v = sort_nested(v) new_data[k] = v return new_data elif isinstance(data, list): new_data = [] for v in sorted(data): if isinstance(v, list): v = sort_nested(v) if isinstance(v, dict): v = sort_nested(v) new_data.append(v) return new_data def check_rdf_scan(expected_file, result_file, regen=False): """ Check that expected and result_file are equal. Both are paths to SPDX RDF XML files, UTF-8 encoded. """ result = load_and_clean_rdf(result_file) if regen: expected = result with io.open(expected_file, 'w', encoding='utf-8') as o: json.dump(expected, o, indent=2) else: with io.open(expected_file, 'r', encoding='utf-8') as i: expected = sort_nested(json.load(i)) assert expected == result def load_and_clean_tv(location): """ Return a mapping for the SPDX TV file at location suitable for comparison. The file content is cleaned from variable parts such as dates, generated UUIDs and versions """ with io.open(location, encoding='utf-8') as l: content = l.read() content = [l for l in content.splitlines(False) if l and l.strip() and not l.startswith(('Creator: ', 'Created: ',))] return '\n'.join(content) def check_tv_scan(expected_file, result_file, regen=False): """ Check that expected and result_file are equal. Both are paths to plain SPDX tv text files, UTF-8 encoded. """ result = load_and_clean_tv(result_file) if regen: with io.open(expected_file, 'w') as o: o.write(result) expected = load_and_clean_tv(expected_file) assert expected == result def load_and_clean_json(location): """ Return plain Python nested data for the SPDX JSON file at location suitable for comparison. The file content is cleaned from variable parts such as dates, generated UUIDs and versions """ with io.open(location, encoding='utf-8') as l: content = l.read() data = json.loads(content) if 'creationInfo' in data['Document']: del(data['Document']['creationInfo']) return sort_nested(data) def check_json_scan(expected_file, result_file, regen=False): """ Check that expected_file and result_file are equal. Both are paths to SPDX JSON files, UTF-8 encoded. """ result = load_and_clean_json(result_file) if regen: with io.open(expected_file, 'w', encoding='utf-8') as o: o.write(result) expected = load_and_clean_json(expected_file) assert expected == result def load_and_clean_yaml(location): """ Return plain Python nested data for the SPDX YAML file at location suitable for comparison. The file content is cleaned from variable parts such as dates, generated UUIDs and versions """ with io.open(location, encoding='utf-8') as l: content = l.read() data = yaml.safe_load(content) if 'creationInfo' in data['Document']: del(data['Document']['creationInfo']) return sort_nested(data) def check_yaml_scan(expected_file, result_file, regen=False): """ Check that expected_file and result_file are equal. Both are paths to SPDX YAML files, UTF-8 encoded. """ result = load_and_clean_yaml(result_file) if regen: with io.open(expected_file, 'w', encoding='utf-8') as o: o.write(result) expected = load_and_clean_yaml(expected_file) assert expected == result def load_and_clean_xml(location): """ Return plain Python nested data for the SPDX XML file at location suitable for comparison. The file content is cleaned from variable parts such as dates, generated UUIDs and versions """ with io.open(location, encoding='utf-8') as l: content = l.read() data = xmltodict.parse(content, encoding='utf-8') if 'creationInfo' in data['SpdxDocument']['Document']: del(data['SpdxDocument']['Document']['creationInfo']) return sort_nested(data) def check_xml_scan(expected_file, result_file, regen=False): """ Check that expected_file and result_file are equal. Both are paths to SPDX XML files, UTF-8 encoded. """ result = load_and_clean_xml(result_file) if regen: with io.open(expected_file, 'w', encoding='utf-8') as o: o.write(result) expected = load_and_clean_xml(expected_file) assert expected == result class TestParserUtils(object): """ Helper class to represent SPDX Document models as Python types after parsing to be compared to expected data from a JSON file. """ @classmethod def license_to_dict(cls, license): """ Represents spdx.document.License, spdx.document.LicenseConjunction or spdx.document.LicenseDisjunction as a Python dictionary """ CONJ_SEP = re.compile(' AND | and ') DISJ_SEP = re.compile(' OR | or ') license_dict = OrderedDict() if isinstance(license, spdx.document.LicenseConjunction): license_dict['type'] = 'Conjunction' sep_regex = CONJ_SEP elif isinstance(license, spdx.document.LicenseDisjunction): license_dict['type'] = 'Disjunction' sep_regex = DISJ_SEP else: license_dict['type'] = 'Single' license_dict['identifier'] = license.identifier license_dict['name'] = license.full_name return license_dict license_dict['identifier'] = sorted(sep_regex.split(license.identifier)) license_dict['name'] = sorted(sep_regex.split(license.full_name)) return license_dict @classmethod def version_to_dict(cls, version): """ Represents spdx.version.Version as a Python dictionary """ return OrderedDict([ ('major', int(version.major)), ('minor', int(version.minor)) ]) @classmethod def entity_to_dict(cls, entity): """ Represents spdx.creationInfo.Creator subclasses as a dictionary """ entity_dict = OrderedDict(name=entity.name) if isinstance(entity, spdx.creationinfo.Tool): entity_dict['type'] = 'Tool' return entity_dict entity_dict['email'] = entity.email entity_dict['type'] = 'Person' if isinstance(entity, spdx.creationinfo.Organization): entity_dict['type'] = 'Organization' return entity_dict return entity_dict @classmethod def checksum_to_dict(cls, checksum): """ Represents spdx.checksum.Algorithm as a Python dictionary """ return OrderedDict([ ('identifier', checksum.identifier), ('value', checksum.value)]) @classmethod def package_to_dict(cls, package): """ Represents spdx.package.Package as a Python dictionary """ lics_from_files = sorted(package.licenses_from_files, key=lambda lic: lic.identifier) return OrderedDict([ ('id', package.spdx_id), ('name', package.name), ('packageFileName', package.file_name), ('summary', package.summary), ('description', package.description), ('versionInfo', package.version), ('sourceInfo', package.source_info), ('downloadLocation', package.download_location), ('homepage', package.homepage), ('originator', cls.entity_to_dict(package.originator)), ('supplier', cls.entity_to_dict(package.supplier)), ('licenseConcluded', cls.license_to_dict(package.conc_lics)), ('licenseDeclared', cls.license_to_dict(package.license_declared)), ('copyrightText', package.cr_text), ('licenseComment', package.license_comment), ('checksum', cls.checksum_to_dict(package.check_sum)), ('files', cls.files_to_list(sorted(package.files))), ('licenseInfoFromFiles', [cls.license_to_dict(lic) for lic in lics_from_files]), ('verificationCode', OrderedDict([ ('value', package.verif_code), ('excludedFilesNames', sorted(package.verif_exc_files))]) ) ]) @classmethod def files_to_list(cls, files): """ Represents a list of spdx.file.File as a Python list of dictionaries """ files_list = [] for file in files: lics_from_files = sorted(file.licenses_in_file, key=lambda lic: lic.identifier) contributors = sorted(file.contributors, key=lambda c: c.name) file_dict = OrderedDict([ ('id', file.spdx_id), ('name', file.name), ('type', file.type), ('comment', file.comment), ('licenseConcluded', cls.license_to_dict(file.conc_lics)), ('copyrightText', file.copyright), ('licenseComment', file.license_comment), ('notice', file.notice), ('checksum', cls.checksum_to_dict(file.chk_sum)), ('licenseInfoFromFiles', [cls.license_to_dict(lic) for lic in lics_from_files]), ('contributors', [cls.entity_to_dict(contributor) for contributor in contributors]), ('dependencies', sorted(file.dependencies)), ('artifactOfProjectName', file.artifact_of_project_name), ('artifactOfProjectHome', file.artifact_of_project_home), ('artifactOfProjectURI', file.artifact_of_project_uri), ]) files_list.append(file_dict) return files_list @classmethod def ext_document_references_to_list(cls, ext_doc_refs): """ Represents a list of spdx.document.ExternalDocumentRef as a Python list of dictionaries """ ext_doc_refs_list = [] for ext_doc_ref in ext_doc_refs: ext_doc_ref_dict = OrderedDict([ ('externalDocumentId', ext_doc_ref.external_document_id), ('spdxDocumentNamespace', ext_doc_ref.spdx_document_uri), ('checksum', cls.checksum_to_dict(ext_doc_ref.check_sum)), ]) ext_doc_refs_list.append(ext_doc_ref_dict) return ext_doc_refs_list @classmethod def extracted_licenses_to_list(cls, extracted_licenses): """ Represents a list of spdx.document.ExtractedLicense as a Python list of dictionaries """ extracted_licenses_list = [] for extracted_license in extracted_licenses: extracted_license_dict = OrderedDict([ ('name', extracted_license.full_name), ('identifier', extracted_license.identifier), ('text', extracted_license.text), ('comment', extracted_license.comment), ('cross_refs', sorted(extracted_license.cross_ref)), ]) if extracted_license_dict not in extracted_licenses_list: extracted_licenses_list.append(extracted_license_dict) return extracted_licenses_list @classmethod def annotations_to_list(cls, annotations): """ Represents a list of spdx.annotation.Annotation as a Python list of dictionaries """ annotations_list = [] for annotation in annotations: annotation_dict = OrderedDict([ ('id', annotation.spdx_id), ('comment', annotation.comment), ('type', annotation.annotation_type), ('annotator', cls.entity_to_dict(annotation.annotator)), ('date', utils.datetime_iso_format(annotation.annotation_date)), ]) annotations_list.append(annotation_dict) return annotations_list @classmethod def reviews_to_list(cls, reviews): """ Represents a list of spdx.review.Review as a Python list of dictionaries """ reviews_list = [] for review in reviews: review_dict = OrderedDict([ ('comment', review.comment), ('reviewer', cls.entity_to_dict(review.reviewer)), ('date', utils.datetime_iso_format(review.review_date)) ]) reviews_list.append(review_dict) return reviews_list @classmethod def snippets_to_list(cls, snippets): """ Represents a list of spdx.snippet.Snippet as a Python list of dictionaries """ snippets_list = [] for snippet in snippets: lics_from_snippet = sorted(snippet.licenses_in_snippet, key=lambda lic: lic.identifier) snippet_dict = OrderedDict([ ('id', snippet.spdx_id), ('name', snippet.name), ('comment', snippet.comment), ('copyrightText', snippet.copyright), ('licenseComments', snippet.license_comment), ('fileId', snippet.snip_from_file_spdxid), ('licenseConcluded', cls.license_to_dict(snippet.conc_lics)), ('licenseInfoFromSnippet', [cls.license_to_dict(lic) for lic in lics_from_snippet]), ]) snippets_list.append(snippet_dict) return snippets_list @classmethod def to_dict(cls, doc): """ Represents a SPDX Document (spdx.document.Document) as nested Python types """ creators = sorted(doc.creation_info.creators, key=lambda c: c.name) return OrderedDict([ ('id', doc.spdx_id), ('specVersion', cls.version_to_dict(doc.version)), ('namespace', doc.namespace), ('name', doc.name), ('comment', doc.comment), ('dataLicense', cls.license_to_dict(doc.data_license)), ('licenseListVersion', cls.version_to_dict(doc.creation_info.license_list_version)), ('creators', [cls.entity_to_dict(creator) for creator in creators]), ('created', utils.datetime_iso_format(doc.creation_info.created)), ('creatorComment', doc.creation_info.comment), ('package', cls.package_to_dict(doc.package)), ('externalDocumentRefs', cls.ext_document_references_to_list(sorted(doc.ext_document_references))), ('extractedLicenses', cls.extracted_licenses_to_list(sorted(doc.extracted_licenses))), ('annotations', cls.annotations_to_list(sorted(doc.annotations))), ('reviews', cls.reviews_to_list(sorted(doc.reviews))), ('snippets', cls.snippets_to_list(sorted(doc.snippet))), ])