#! /usr/bin/env python import json import os import re from copy import deepcopy import requests from pyld import jsonld from pyld.jsonld import JsonLdProcessor import validators from cert_schema.errors import * try: from urllib.request import urlopen except ImportError: from urllib2 import urlopen SECURITY_CONTEXT_URL = 'https://w3id.org/security/v1' OPEN_BADGES_V2_CONTEXT = 'https://openbadgespec.org/v2/context.json' OPEN_BADGES_V2_CANONICAL_CONTEXT = 'https://w3id.org/openbadges/v2' BLOCKCERTS_V2_ALPHA_CONTEXT = 'https://w3id.org/blockcerts/schema/2.0-alpha/context.json' BLOCKCERTS_V2_ALPHA_SCHEMA = 'https://w3id.org/blockcerts/schema/2.0-alpha/schema.json' BLOCKCERTS_V2_CONTEXT = 'https://w3id.org/blockcerts/schema/2.0/context.json' BLOCKCERTS_V2_SCHEMA = 'https://w3id.org/blockcerts/schema/2.0/schema.json' BLOCKCERTS_V2_CANONICAL_CONTEXT = 'https://w3id.org/blockcerts/v2' BLOCKCERTS_VOCAB = 'https://w3id.org/blockcerts/v2#' JSONLD_OPTIONS = {'algorithm': 'URDNA2015', 'format': 'application/nquads'} # Nonstandard contexts BLOCKCERTS_V2_ALPHA_CONTEXT_2 = 'https://www.blockcerts.org/schema/2.0-alpha/context.json' BLOCKCERTS_V2_CONTEXT_2 = 'https://www.blockcerts.org/schema/2.0/context.json' FALLBACK_VOCAB = 'http://fallback.org/' FALLBACK_CONTEXT = {'@vocab': FALLBACK_VOCAB} BASE_DIR = os.path.abspath(os.path.dirname(__file__)) JSON_LD_CONTEXT_V1_2 = os.path.join(BASE_DIR, '1.2/context.json') JSON_LD_CONTEXT_V2_0_ALPHA = os.path.join(BASE_DIR, '2.0-alpha/context.json') JSON_LD_CONTEXT_V2_0 = os.path.join(BASE_DIR, '2.0/context.json') OBI_JSON_LD_CONTEXT_V2 = os.path.join(BASE_DIR, '2.0/obi.json') PRELOADED_CONTEXTS = {} with open(OBI_JSON_LD_CONTEXT_V2) as data_file: obi_context = json.load(data_file) PRELOADED_CONTEXTS[OPEN_BADGES_V2_CONTEXT] = obi_context PRELOADED_CONTEXTS[OPEN_BADGES_V2_CANONICAL_CONTEXT] = obi_context with open(JSON_LD_CONTEXT_V2_0_ALPHA) as data_file: bc_context = json.load(data_file) PRELOADED_CONTEXTS[BLOCKCERTS_V2_ALPHA_CONTEXT] = bc_context PRELOADED_CONTEXTS[BLOCKCERTS_V2_ALPHA_CONTEXT_2] = bc_context with open(JSON_LD_CONTEXT_V2_0) as data_file: bc_context = json.load(data_file) PRELOADED_CONTEXTS[BLOCKCERTS_V2_CONTEXT] = bc_context PRELOADED_CONTEXTS[BLOCKCERTS_V2_CONTEXT_2] = bc_context PRELOADED_CONTEXTS[BLOCKCERTS_V2_CANONICAL_CONTEXT] = bc_context def to_loader_response(data, url): return { 'contextUrl': None, 'documentUrl': url, 'document': data } def load_document(url): """ :param url: :return: """ result = validators.url(url) if result: response = requests.get( url, headers={'Accept': 'application/ld+json, application/json'} ) return response.text raise InvalidUrlError('Could not validate ' + url) def jsonld_document_loader(url): """ Retrieves JSON-LD at the given URL. Propagates BlockcertValidationError is url is invalid or doesn't exist :param url: the URL to retrieve :return: JSON-LD at the URL """ data = load_document(url) return to_loader_response(data, url) def preloaded_context_document_loader(url, override_cache=False): if url in PRELOADED_CONTEXTS: context = PRELOADED_CONTEXTS[url] return to_loader_response(context, url) else: return jsonld_document_loader(url) def compact_with_json_ld_context(input_json, document_loader=preloaded_context_document_loader): options = {} if document_loader: options['documentLoader'] = document_loader with open(JSON_LD_CONTEXT_V1_2) as context_f: ctx = json.load(context_f) compacted = jsonld.compact(input_json, ctx, options=options) return compacted def normalize_jsonld(json_ld_to_normalize, document_loader=preloaded_context_document_loader, detect_unmapped_fields=False): """ Canonicalize the JSON-LD certificate. The detect_unmapped_fields parameter is a temporary, incomplete, workaround to detecting fields that do not correspond to items in the JSON-LD schemas. It works in the Blockcerts context because: - Blockcerts doesn't use a default vocab - fallback.org is not expected to occur Because unmapped fields get dropped during canonicalization, this uses a trick of adding {"@vocab": "http://fallback.org/"} to the json ld, which will cause any unmapped fields to be prefixed with http://fallback.org/. If a @vocab is already there (i.e. an issuer adds this in their extensions), then tampering will change the normalized form, hence the hash of the certificate, so we will still detect this during verification. This issue will be addressed in a first-class manner in the future by the pyld library. :param json_ld_to_normalize: :param document_loader :param detect_unmapped_fields: :return: """ json_ld = json_ld_to_normalize options = deepcopy(JSONLD_OPTIONS) if document_loader: options['documentLoader'] = document_loader if detect_unmapped_fields: json_ld = deepcopy(json_ld_to_normalize) prev_context = JsonLdProcessor.get_values(json_ld_to_normalize, '@context') add_fallback = True for pc in prev_context: if type(pc) is dict: for key, value in pc.items(): if key == '@vocab': # this already has a vocab; unmapped fields will be detected in the hash add_fallback = False break if add_fallback: prev_context.append(FALLBACK_CONTEXT) json_ld['@context'] = prev_context normalized = jsonld.normalize(json_ld, options=options) if detect_unmapped_fields and FALLBACK_VOCAB in normalized: unmapped_fields = [] for m in re.finditer('<http://fallback\.org/(.*)>', normalized): unmapped_fields.append(m.group(0)) error_string = ', '.join(unmapped_fields) raise BlockcertValidationError( 'There are some fields in the certificate that do not correspond to the expected schema. This has likely been tampered with. Unmapped fields are: ' + error_string) return normalized if __name__ == '__main__': options = {} document_loader = preloaded_context_document_loader options['documentLoader'] = document_loader filename = '../examples/2.0/sample_valid-2.0.json' with open(filename) as data_f: data = json.load(data_f) compacted = compact_with_json_ld_context(data, document_loader) expanded = jsonld.expand(compacted, options=options) options = {'algorithm': 'URDNA2015', 'format': 'application/nquads'} if document_loader: options['documentLoader'] = document_loader normalized = jsonld.normalize(data, options) print(json.dumps(expanded, indent=2))