python source code of openapi

################################################################################
## OpenAPI Specification Parsing
##
## Parses an OpenAPI spec in YAML or JSON and generates files from it
################################################################################

import jinja2
import json
import requests
import datetime
from urllib.parse import unquote as urldecode
from copy import deepcopy
from ruamel.yaml.comments import CommentedMap as YamlMap
from ruamel.yaml.comments import CommentedSeq as YamlSeq

from dactyl.common import *
from dactyl.http_constants import HTTP_METHODS, HTTP_STATUS_CODES, HTTP_METHODS_WITH_REQ_BODIES

DATA_TYPES_SUFFIX = "-data-types"
METHOD_TOC_SUFFIX = "-methods"

TOC_TEMPLATE = "template-openapi_endpoint_toc.md"
TAG_TOC_TEMPLATE = "template-openapi_endpoint_tag_toc.md"
ENDPOINT_TEMPLATE = "template-openapi_endpoint.md"
DATA_TYPES_TOC_TEMPLATE = "template-openapi_data_types_toc.md"
DATA_TYPE_TEMPLATE = "template-openapi_data_type.md"

class ApiDef:
    cached_specs = {}
    def __init__(self, spec_path, api_slug=None, extra_fields={},
                template_path=None):
        self.read_swag(spec_path)
        self.clean_up_swag()
        self.deref_swag()

        try:
            self.api_title = self.swag["info"]["title"]
        except IndexError:
            self.api_title = fname.replace(".yml","")+" API (working title)"

        if api_slug is None:
            self.api_slug = slugify(self.api_title)
        else:
            self.api_slug = api_slug

        self.extra_fields = extra_fields
        self.setup_jinja_env(template_path)

    @classmethod
    def from_path(cls, spec_path, api_slug=None, extra_fields={},
                template_path=None):
        """
        Instantiate an ApiDef instance only if we haven't done so already. This
        saves the trouble of fetching & parsing API specs more than once.
        """
        if spec_path in cls.cached_specs.keys():
            return cls.cached_specs[spec_path]

        apidef = cls(spec_path, api_slug=api_slug,
                extra_fields=extra_fields, template_path=template_path)
        cls.cached_specs[spec_path] = apidef
        return apidef

    def read_swag(self, spec_path):
        """Read the OpenAPI definition from either a local file or a URL, and
        store it at self.swag"""
        logger.debug("Reading OpenAPI definition from %s"%spec_path)

        if spec_path[:5] == "http:" or spec_path[:6] == "https:":
            response = requests.get(spec_path)
            if response.status_code == 200:
                self.swag = yaml.load(response.text)
            else:
                raise requests.RequestException("Status code for page was not 200")
        else:
            with open(spec_path, "r", encoding="utf-8") as f:
                self.swag = yaml.load(f)


    def setup_jinja_env(self, template_path=None):
        """Sets up the environment used to inject OpenAPI data into Markdown
        templates"""
        if template_path is None:
            loader = jinja2.PackageLoader(__name__)
        else:
            logger.debug("OpenAPI spec: preferring templates from %s"%template_path)
            loader = jinja2.ChoiceLoader([
                jinja2.FileSystemLoader(template_path),
                jinja2.PackageLoader(__name__)
            ])
        self.env = jinja2.Environment(loader=loader, extensions=['jinja2.ext.i18n'])
        self.env.lstrip_blocks = True
        self.env.rstrip_blocks = True

    @staticmethod
    def dig(parts, context):
        """
        Search a context object for something matching a $ref (recursive)
        """
        key = parts[0].replace("~1", "/").replace("~0", "~") # unescaped
        key = urldecode(key)
        try:
            key = int(key)
        except:
            pass
        if key not in context.keys():
            raise IndexError(key)

        if len(parts) == 1:
            return context[key]
        else:
            return ApiDef.dig(parts[1:], context[key])

    def deref(self, ref):
        """Look through the YAML for a specific reference key, and return
        the value that key represents.
        - Raises IndexError if the key isn't found
            in the YAML.
        - add_title: If true, provide a "title" field when the reference
            resolves to an object that doesn't have a "title". The provided
            "title" value is based on the key that contained the reference
        """
        assert len(ref) > 1 and ref[0] == "#" and ref[1] == "/"
        parts = ref[2:].split("/")
        assert len(parts) > 0

        return self.dig(parts, self.swag)

    def deref_swag(self):
        """
        Walk the OpenAPI specification for $ref objects and resolve them to
        the values they reference. Assumes the entire spec is contained in a
        single file.
        """

        def deref_yaml(yaml_value):
            if "keys" in dir(yaml_value): # Dictionary-like type
                if "$ref" in yaml_value.keys():
                    # It's a reference; deref it
                    reffed_value = self.deref(yaml_value["$ref"])
                    # The referenced object may contain more references, so
                    # resolve those before returning
                    return deref_yaml(reffed_value)
                else:
                    # recurse through each key/value pair looking for refs
                    the_copy = YamlMap()
                    for k,v in yaml_value.items():
                        the_copy[k] = deref_yaml(v)
                    return the_copy
            elif "append" in dir(yaml_value): # List-like type
                # recurse through each item looking for refs
                the_copy = YamlSeq()
                for item in yaml_value:
                    the_copy.append(deref_yaml(item))
                return the_copy
            else: # Probably a basic type
                # base case: return the value
                return yaml_value

        self.swag = deref_yaml(self.swag)

    def clean_up_swag(self):
        # Give each schema in the "components" a title if it's missing one
        schemas = self.swag.get("components", {}).get("schemas", {})
        for key,schema in schemas.items():
            title = schema.get("title", key)
            schema["title"] = title
            if "example" in schema:
                try:
                    j = json.dumps(schema["example"], indent=4, default=self.json_default)
                    schema["example"] = j
                except Exception as e:
                    logger.debug("%s example isn't json: %s"%(title,j))
        self.deref_swag()

        # Find all tags used in endpoints and add any undefined ones to the
        # top level OpenAPI Object
        taglist = self.swag.get("tags", [])
        for path, method, endpoint in self.endpoint_iter():
            etags = endpoint.get("tags", [])
            for tag in etags:
                if tag not in [t.get("name") for t in taglist]:
                    taglist.append({
                        "name": tag,
                    })
        self.swag["tags"] = taglist


    def render_method_toc(self):
        t = self.env.get_template(TOC_TEMPLATE)
        context = self.new_context()
        context["endpoints"] = self.endpoint_iter()
        context["endpoints_by_tag"] = self.endpoint_iter
        return t.render(self.swag, **context)

    def render_tag_toc(self, tag):
        t = self.env.get_template(TAG_TOC_TEMPLATE)
        context = self.new_context()
        context["tag"] = tag
        context["endpoints"] = self.endpoint_iter()
        context["endpoints_by_tag"] = self.endpoint_iter
        return t.render(self.swag, **context)

    def render_data_types_toc(self):
        t = self.env.get_template(DATA_TYPES_TOC_TEMPLATE)
        context = self.new_context()
        context["schemas"] = self.data_type_iter()
        return t.render(self.swag, **context)

    def render_data_type(self, key, schema):
        t = self.env.get_template(DATA_TYPE_TEMPLATE)
        context = self.new_context()
        if "title" not in schema.keys():
            schema["title"] = key
        return t.render(schema, **context)

    def render_endpoint(self, path, method, endpoint):
        t = self.env.get_template(ENDPOINT_TEMPLATE)
        context = self.new_context()
        context["method"] = method
        context["path"] = path
        context["path_params"] = [p for p in endpoint.get("parameters",[]) if p["in"]=="path"]
        context["query_params"] = [p for p in endpoint.get("parameters",[]) if p["in"]=="query"]
        context["x_example_request_body"] = self.get_x_example_request_body(path,method,endpoint)
        #TODO: header & cookie params?? example response body?
        return t.render(endpoint, **context)

    def get_x_example_request_body(self, path, method, endpoint):
        if method not in HTTP_METHODS_WITH_REQ_BODIES:
            return ""

        content = endpoint.get("requestBody",{}).get("content",{})
        if not content:
            return ""
        for mediatype,content_inner in content.items():
            if "example" in content_inner:
                # single example
                ex = content_inner["example"]
            else:
                try:
                    # multiple examples? use the first one
                    ex = list(content_inner["examples"].values())[0]["value"]
                except (IndexError, KeyError, AttributeError) as e:
                    logger.debug("Media type %s didn't have an example value"%mediatype)
                    return ""

            try:
                ex_pp = json.dumps(ex, indent=4, separators=(',', ': '), default=self.json_default)
            except TypeError as e:
                traceback.print_tb(e.__traceback__)
                logger.debug("json dumps failed on example '%s'"%ex)
                ex_pp = ex
            return ex_pp

        logger.debug("couldn't find an example value for %s %s"%(method,path))
        return ""

    def get_endpoint_renderer(self, path, method, endpoint):
        return lambda: self.render_endpoint(path, method, endpoint)

    def get_data_type_renderer(self, key, schema):
        return lambda: self.render_data_type(key, schema)

    def get_tag_toc_renderer(self, tag):
        return lambda: self.render_tag_toc(tag)

    def endpoint_iter(self, tag=None):
        paths = self.swag.get("paths", {})
        for path, path_def in paths.items():
            for method in HTTP_METHODS:
                if method in path_def.keys():
                    endpoint = path_def[method]
                    if tag==None or tag in endpoint.get("tags", []) or \
                            (tag=="Uncategorized" and endpoint.get("tags", []) == []):
                        # TODO: Inherit parameters from the path definition itself
                        # Fill in some "sensible defaults" for fields we really want
                        operationId = endpoint.get("operationId", slugify(method+path))
                        endpoint["operationId"] = operationId
                        summary = endpoint.get("summary", operationId)
                        endpoint["summary"] = summary
                        yield (path, method, endpoint)

    def data_type_iter(self):
        schemas = self.swag.get("components", {}).get("schemas", {})
        for key,schema in schemas.items():
            title = schema.get("title", key)
            schema["title"] = title
            yield (title, schema)

    def create_pagelist(self):
        """
        Return an array of pages representing this API, which Dactyl can use
        as it would use a normal list of pages in the config
        """
        pages = []

        # TODO: make all the blurb/category strings template strings that can
        #       be translated and configured

        # add methods table of contents
        toc_page = deepcopy(self.extra_fields)
        toc_page.update({
            "name": "All "+self.api_title+" Methods",
            "__md_generator": self.render_method_toc,
            "html": self.api_slug+METHOD_TOC_SUFFIX+".html",
            "blurb": "List of methods/endpoints available in "+self.api_title,
            "category": "All "+self.api_title+" Methods",
        })
        if "parent" not in toc_page:
            toc_page["parent"] = "index.html"
        pages.append(toc_page)

        # add a table of contents per tag
        for tag in self.swag.get("tags",[{"name": "Uncategorized","description":""}]):
            tag_toc_page = deepcopy(self.extra_fields)
            tag_toc_page.update({
                "name": tag["name"].title()+" Methods",
                "__md_generator": self.get_tag_toc_renderer(tag),
                "html": self.api_slug+"-"+tag["name"]+METHOD_TOC_SUFFIX+".html",
                "blurb": tag.get("description",""),
                "category": tag["name"].title()+" Methods",
                "parent": toc_page["html"],
            })
            pages.append(tag_toc_page)

            # Add endpoints for this tag, except duplicates
            for path, method, endpoint in self.endpoint_iter(tag["name"]):
                tag0 = endpoint.get("tags",["Uncategorized"])[0]
                if tag0 != tag["name"]:
                    continue # Skip method whose primary tag is not this one
                method_page = deepcopy(self.extra_fields)
                method_page.update({
                    "name": endpoint["summary"],
                    "__md_generator": self.get_endpoint_renderer(path, method, endpoint),
                    "html": self.method_link(path, method, endpoint),
                    "blurb": endpoint.get("description", endpoint["operationId"]+" method"),
                    "category": tag0+" Methods",
                    "parent": tag_toc_page["html"],
                })
                pages.append(method_page)

        # add data types table of contents
        data_types_page = deepcopy(self.extra_fields)
        data_types_page.update({
            "name": self.api_title+" Data Types",
            "__md_generator": self.render_data_types_toc,
            "html": self.api_slug+DATA_TYPES_SUFFIX+".html",
            "blurb": "List of all data types defined for "+self.api_title,
            "category": self.api_title+" Data Types",
            "parent": toc_page["html"],
        })
        pages.append(data_types_page)

        # add each data type from the components.schemas list
        schemas = self.swag.get("components", {}).get("schemas", {})
        for title, schema in self.data_type_iter():
            data_type_page = deepcopy(self.extra_fields)
            data_type_page.update({
                "name": title,
                "__md_generator": self.get_data_type_renderer(title, schema),
                "html": self.type_link(title),
                "blurb": "Definition of "+title+" data type",
                "category": self.api_title+" Data Types",
                "parent": data_types_page["html"],
            })
            pages.append(data_type_page)

        return pages

    def add_metadata(self, target_data):
        """
        Extend the provided target_data dictionary with metadata pulled from
        the spec.
        """
        info = self.swag.get("info", {"title":self.api_title, "version":"0.0.0"})
        target_data["info"] = info


    def new_context(self):
        return {
            "api_title": self.api_title,
            "type_link": self.type_link,
            "method_link": self.method_link,
            "HTTP_METHODS": HTTP_METHODS,
            "HTTP_STATUS_CODES": HTTP_STATUS_CODES,
            "spec": self.swag,
            "debug": logger.debug,
            "slugify": slugify,
            "md_escape": self.md_escape,
        }

    @staticmethod
    def md_escape(text):
        """
        Escape potential Markdown syntax in a string.
        This is meant to be passed to the templates.
        """
        specialchars = "\\`*_{}[]()#+-.!"
        s = ""
        for c in text:
            if c in specialchars:
                s += "\\"
            s += c
        return s

    @staticmethod
    def json_default(o):
        """
        Serializer function for JSON (from YAML)
        """
        if isinstance(o, (datetime.date, datetime.datetime)):
            return o.isoformat()
        else:
            return str(o)

    def type_link(self, title):
        # TODO: in "md" mode, use ".md" suffix
        return self.api_slug+DATA_TYPES_SUFFIX+"-"+slugify(title.lower())+".html"

    def method_link(self, path, method, endpoint):
        return self.api_slug+"-"+slugify(endpoint["operationId"]+".html")