python source code of compliance

# -*- coding: utf-8 -*-
import json
import logging
import random
import time
try:
    import urllib.parse as urllib  # For Python 3 compat
except ImportError:
    import urllib
import uuid

import ga4gh.drs.schema
from ga4gh.drs.test import DataRepositoryServiceTest, test_requires

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


class AbstractComplianceTest(DataRepositoryServiceTest):
    """
    This class implements a number of compliance tests for  Object Service
    implementations. It is meant to provide a single, standardized test
    harness to verify that a given DOS implementation acts in a manner
    consistent with the schema.

    Using the test harness is pretty straightforward, and only requires
    implementing a method that can make requests to the service under test
    (:meth:`~AbstractComplianceTest._make_request`). As this class subclasses
    :class:`unittest.TestCase`, all the functions exposed to a subclass
    of :class:`unittest.TestCase` (e.g. :meth:`~unittest.TestCase.setUpClass`)
    are available for use.

    This test suite does not perform any authentication testing. Requests made
    during testing are made with the assumption that they will be properly
    authenticated in :meth:`_make_request` or similar.

    For a service built using Chalice, you would likely be able to write
    something similar to this::

        from ga4gh.drs.test.compliance import AbstractComplianceTest
        from chalice import LocalGateway, Config
        from my_chalice_app import chalice_app

        class TestApp(AbstractComplianceTest):
            @classmethod
            def setUpClass(cls):
                cls.lg = LocalGateway(chalice_app, Config())

            @classmethod
            def _make_request(self, meth, path, headers=None, body=None)
                headers = headers or {}
                r = self.lg.handle_request(method=meth, path='/ga4gh/drs/v1' + path,
                                           headers=headers, body=body)
                return r['body'], r['statusCode']

    You would then be able to run the compliance test suite however you
    normally run your tests (e.g. ``nosetests`` or ``python -m unittest discover``).

    :var supports: a list of supported DOS operations. By default, this is
                   the list of all DOS operations, named by the `operationId`
                   key in the schema::

                      supports = ['GetServiceInfo', 'GetBundleVersions',
                                  'CreateBundle', 'ListBundles',
                                  'UpdateObject', 'GetObject', ...]

                   Adding / removing operations from this list will adjust
                   which tests are run. So, doing something like::

                      class Test(AbstractComplianceTest):
                          self.supports = ['ListObjects']

                   would skip all tests calling UpdateBundle, GetBundle,
                   and any other endpoint that is not ListObjects.
    """
    # Populate :var:`supports` with the `operationId` of each DOS endpoint
    # specified in the schema.
    supports = []
    for path in ga4gh.drs.schema.present_schema()['paths'].values():
        for method in path.values():
            supports.append(method['operationId'])

    @classmethod
    def _make_request(cls, meth, path, headers=None, body=None):
        """
        Method that makes requests to a DOS implementation under test
        given a method, path, request headers, and a request body.

        The provided path is the path provided in the  Object Service
        schema - this means that in your implementation of this method,
        you might need to prepend the provided path with your ``basePath``,
        e.g. ``/ga4gh/drs/v1``.

        This method should return a tuple of the raw request content as a
        string and the return code of the request as an int.

        :param str meth: the HTTP method to use in the request (i.e. GET,
                         PUT, etc.)
        :param str path: path to make a request to, sans hostname (e.g.
                         `/bundles`)
        :param dict headers: headers to include with the request
        :param dict body: data to be included in the request body (serialized
                          as JSON)
        :rtype: tuple
        :returns: a tuple of the response body as a JSON-formatted string and the
                  response code as an int
        """
        raise NotImplementedError

    @classmethod
    def drs_request(cls, meth, path, headers=None, body=None, expected_status=200):
        """
        Wrapper function around :meth:`AbstractComplianceTest._make_request`.
        Logs the request being made, makes the request with
        :meth:`._make_request`, checks for errors, and performs transparent
        JSON de/serialization.

        It is assumed that any request made through this function is a
        request made to the underlying DOS implementation - e.g.,
        ``self.drs_request('https://example.com/')`` should be expected
        to fail.

        :param str meth: the HTTP method to use in the request (i.e. GET,
                         PUT, etc.)
        :param str path: path to make a request to, sans hostname (e.g.
                         `/bundles`)
        :param dict headers: headers to include with the request
        :param dict body: data to be included in the request body
                          (**not** serialized as JSON)
        :param int expected_status: expected HTTP status code. If the status
                                    code is not expected, an error will be
                                    raised.
        :rtype: dict
        :returns: the response body
        """
        # Log the request being made, make the request itself, then log the response.
        logger.debug("%s %s", meth, path)
        # DOS only really speaks JSON, so we can assume that if data is being
        # sent with a request, that data will be JSON
        headers = headers or {}
        if body and 'Content-Type' not in headers:
            headers['Content-Type'] = 'application/json'
        request, status = cls._make_request(meth=meth, path=path, headers=headers,
                                            body=json.dumps(body))
        logger.info("{meth} {path} [{status}]".format(**locals()))

        # Check to make sure the return code is what we expect
        msg = "{meth} {path} returned {status}, expected {expected_status}: {request}"
        # We could use :meth:`assertEqual` here, but if we do,
        # :meth:`drs_request` must be an instance method. Since the only
        # advantage we really lose is a prettier error message, we can
        # be a little verbose this one time.
        # It's preferable that :meth:`drs_request` be defined as a class method
        # to allow one-time server setup to be performed in meth:`setUpClass`,
        # which must necessarily be a class method.
        if not status == expected_status:
            raise AssertionError(msg.format(**locals()))

        # Return the deserialized request body
        return json.loads(request)

    @staticmethod
    def get_query_url(path, **kwargs):
        """
        Returns the given path with the provided kwargs concatenated as
        query parameters, e.g.::

            >>> self.get_query_url('/objects', alias=123)
            '/objects?alias=123'

        :param str path: URL path without query parameters
        :param kwargs: query parameters
        :rtype: str
        """
        return path + '?' + urllib.urlencode(kwargs)

    def get_random_object(self):
        """
        Retrieves a 'random' data object by performing a ListObjects
        request with a large page size then randomly selecting a data
        object from the response.

        As this test utilizes the ListObjects operation, be sure to
        specify that as a test requirement with :func:`test_requires`
        when using this context manager in a test case.

        Usage::

            obj, url = self.get_random_object()

        :returns: a random data object as a dict and its relative URL
                  (e.g. '/objects/abcdefg-12345') as a string
        :rtype: tuple
        """
        r = self.drs_request('GET', self.get_query_url('/objects', page_size=100))
        obj = random.choice(r['objects'])
        url = '/objects/' + obj['id']
        return obj, url

    def get_random_bundle(self):
        """
        Retrieves a 'random' data bundle. Similar to :meth:`get_random_object`
        but retrieves a data bundle instead.
        """
        r = self.drs_request('GET', self.get_query_url('/bundles', page_size=100))
        bdl = random.choice(r['bundles'])
        url = '/bundles/' + bdl['id']
        return bdl, url

    # # ListObject tests
    @test_requires('ListObjects')
    def test_list_objects_simple(self):
        """
        Smoke test to verify that `GET /objects` returns a response.
        """
        r = self.drs_request('GET', '/objects')
        self.assertTrue(r)

    @test_requires('ListObjects')
    def test_list_objects_by_checksum(self):
        """
        Test that filtering by checksum in ListObjects works nicely.
        Since we can assume that checksums are unique between data
        objects, we can test this functionality by selecting a random
        data object then using ListObjects with a checksum parameter
        and asserting that only one result is returned and that the
        result returned is the same as the one queried.
        """
        obj, _ = self.get_random_object()
        for cs in obj['checksums']:
            url = self.get_query_url('/objects', checksum=cs['checksum'], checksum_type=cs['type'])
            r = self.drs_request('GET', url)
            self.assertEqual(len(r['objects']), 1)
            self.assertEqual(r['objects'][0]['id'], obj['id'])

    @test_requires('ListObjects')
    def test_list_objects_by_alias(self):
        """
        Tests that filtering by alias in ListObjects works. We do
        this by selecting a random data object with ListObjects
        then performing another ListObjects query but filtering
        by the alias, then checking that every returned object contains
        the proper aliases.
        """
        reference_obj, _ = self.get_random_object()
        url = self.get_query_url('/objects', alias=reference_obj['aliases'][0])
        queried_objs = self.drs_request('GET', url)['objects']
        for queried_obj in queried_objs:
            self.assertIn(reference_obj['aliases'][0], queried_obj['aliases'])

    @test_requires('ListObjects')
    def test_list_objects_with_nonexist_alias(self):
        """
        Test to ensure that looking up a nonexistent alias returns an
        empty list.
        """
        alias = str(uuid.uuid1())  # An alias that is unlikely to exist
        body = self.drs_request('GET', self.get_query_url('/objects', alias=alias))
        self.assertEqual(len(body['objects']), 0)

    @test_requires('ListObjects')
    def test_list_objects_paging(self):
        """
        Demonstrates basic paging features.
        """
        # Test the page_size parameter
        r = self.drs_request('GET', self.get_query_url('/objects', page_size=3))
        self.assertEqual(len(r['objects']), 3)
        r = self.drs_request('GET', self.get_query_url('/objects', page_size=7))
        self.assertEqual(len(r['objects']), 7)

        # Next, given that the adjusting page_size works, we can test that paging
        # works by making a ListObjects request with page_size=2, then making
        # two requests with page_size=1, and comparing that the results are the same.
        both = self.drs_request('GET', self.get_query_url('/objects', page_size=2))
        self.assertEqual(len(both['objects']), 2)
        first = self.drs_request('GET', self.get_query_url('/objects', page_size=1))
        self.assertEqual(len(first['objects']), 1)
        second = self.drs_request('GET', self.get_query_url('/objects', page_size=1,
                                                            page_token=first['next_page_token']))
        self.assertEqual(len(second['objects']), 1)
        self.assertEqual(first['objects'][0], both['objects'][0])
        self.assertEqual(second['objects'][0], both['objects'][1])

    @test_requires('ListObjects')
    def test_list_object_querying(self):
        """
        Tests if ListObject handles multiple query parameters correctly.
        """
        # ListObjects supports querying by checksum, URL, and alias.
        # To test this, let us take a data object with a unique checksum,
        # URL, and alias:
        obj, _ = self.get_random_object()

        def query(expected_results, expected_object=None, **kwargs):
            """
            Makes a ListObject query with parameters specifying
            the checksum, URL, and alias of the ``obj`` data object above.

            :param int expected_results: the amount of results to expect
                                         from the ListObjects request
            :param dict expected_object: if expected_results is 1, then
                                         if only one object is returned
                                         from the query, assert that the
                                         returned object is this object
            :param kwargs: query parameters for the ListObjects request
            """
            args = {
                'url': obj['urls'][0]['url'],
                'alias': obj['aliases'][0],
                'checksum': obj['checksums'][0]['checksum'],
                'checksum_type': obj['checksums'][0]['type']
            }
            args.update(kwargs)
            url = self.get_query_url('/objects', **args)
            r = self.drs_request('GET', url)
            self.assertEqual(len(r['objects']), expected_results)
            if expected_object and expected_results == 1:
                self.assertEqual(expected_object, r['objects'][0])

        rand = str(uuid.uuid1())

        # If the data object we selected has a unique checksum, alias, and URL,
        # then when we make a ListObjects requesting all three of those
        # parameters, we should receive exactly one data object back - the one
        # we chose above.
        query(expected_results=1, expected_object=obj)

        # That said, if we query for the above checksum and alias but also
        # query for a URL that is unlikely to exist, then we should receive
        # no results, as the search criteria should be logically ANDed together.
        # If `expected_results != 0`, then it is likely that the criteria are
        # being ORed.
        query(expected_results=0, url=rand)

        # And to finish up the test, we repeat the test directly aforementioned
        # on the other two attributes we expect to be unique.
        query(expected_results=0, alias=rand)
        query(expected_results=0, checksum=rand)

    # # GetObject tests
    @test_requires('ListObjects', 'GetObject')
    def test_get_object(self):
        """
        Lists  Objects and then gets one by ID.
        """
        obj_1, url = self.get_random_object()
        obj_2 = self.drs_request('GET', url)['object']
        # Test that the data object randomly chosen via `/objects`
        # can be retrieved via `/objects/{object_id}`
        self.assertEqual(obj_1, obj_2)

    @test_requires('ListBundles', 'GetBundle')
    def test_get_bundle(self):
        """
        Lists data bundles and then gets one by ID.
        """
        bdl_1, url = self.get_random_bundle()
        bdl_2 = self.drs_request('GET', url)['bundle']
        # Test that the data object randomly chosen via `/bundles`
        # can be retrieved via `/bundles/{bundle_id}`
        self.assertEqual(bdl_1, bdl_2)

    @test_requires('ListBundles')
    def test_list_bundles_with_nonexist_alias(self):
        """
        Test to ensure that searching for data bundles with a nonexistent
        alias returns an empty list.
        """
        alias = str(uuid.uuid1())  # An alias that is unlikely to exist
        body = self.drs_request('GET', self.get_query_url('/bundles', alias=alias))
        self.assertEqual(len(body['bundles']), 0)

    @test_requires('GetBundle')
    def test_get_nonexistent_bundle(self):
        """
        Verifies that requesting a data bundle that doesn't exist results in HTTP 404
        """
        bdl, url = self.get_random_bundle()
        self.drs_request('GET', '/bundles/NonexistentBundle',
                         body={'bundle': bdl}, expected_status=404)

    @test_requires('UpdateObject')
    def test_update_nonexistent_object(self):
        """
        Verifies that trying to update a data object that doesn't exist
        returns HTTP 404
        """
        obj, url = self.get_random_object()
        self.drs_request('PUT', '/objects/NonexistentObjID', expected_status=404,
                         body={'object': obj, 'object_id': obj['id']})

    @test_requires('GetObject', 'ListObjects')
    def test_update_object_with_bad_request(self):
        """
        Verifies that attempting to update a data object with a malformed
        request returns HTTP 400
        """
        _, url = self.get_random_object()
        self.drs_request('PUT', url, expected_status=400, body={'abc': ''})

    @test_requires('ListObjects', 'UpdateObject', 'GetObject')
    def test_alias_update(self):
        """
        Demonstrates updating a data object with a given alias.
        """
        alias = 'daltest:' + str(uuid.uuid1())
        # First, select a "random" object that we can test
        object, url = self.get_random_object()

        # Try and update with no changes.
        self.drs_request('PUT', url, body={'object': object})
        # We specify the Content-Type since Chalice looks for it when
        # deserializing the request body server-side

        # Test adding an alias (acceptably unique to try
        # retrieving the object by the alias)
        object['aliases'].append(alias)

        # Try and update, this time with a change.
        update_response = self.drs_request('PUT', url,
                                           body={'object': object})
        self.assertEqual(object['id'], update_response['object_id'])

        time.sleep(2)

        # Test and see if the update took place by retrieving the object
        # and checking its aliases
        get_response = self.drs_request('GET', url)
        self.assertEqual(update_response['object_id'], get_response['object']['id'])
        self.assertIn(alias, get_response['object']['aliases'])

        # Testing the update again by using a DOS ListObjectsRequest
        # to locate the object by its new alias.
        list_request = {
            'alias': alias,
            # We know the alias is unique, so even though page_size > 1
            # we expect only one result.
            'page_size': 10
        }
        list_url = self.get_query_url('/objects', **list_request)
        list_response = self.drs_request('GET', list_url)
        self.assertEqual(1, len(list_response['objects']))
        self.assertIn(alias, list_response['objects'][0]['aliases'])

        # # Tear down and remove the test alias
        # params['body']['object']['aliases'].remove(alias)
        # self.drs_request('PUT', url, **params)

    @test_requires('ListObjects', 'UpdateObject')
    def test_full_object_update(self):
        """
        Demonstrates updating multiple fields of a data object at once.
        This incidentally also tests object conversion.
        """
        # First, select a "random" object that we can test
        object, url = self.get_random_object()

        # Make a new data object that is different from the data object we retrieved
        attributes = {
            # 'name' and 'description' are optional fields and might not be present
            'name': object.get('name', '') + 'test-suffix',
            # See Biosphere/drs-azul-lambda#87
            # 'description': object.get('description', '') + 'Change This',
            'urls': [
                {'url': 'https://cgl.genomics.ucsc.edu/'},
                {'url': 'https://github.com/Biosphere'}
            ]
        }
        object.update(attributes)

        # Now update the old data object with the new attributes we added
        self.drs_request('PUT', url, body={'object': object})
        time.sleep(2)  # Give the server some time to catch up

        # Test and see if the update took place
        get_response = self.drs_request('GET', url)['object']
        # We only compare the change attributes as DOS implementations
        # can update timestamps server-side
        self.assertEqual(get_response['name'], object['name'])
        self.assertEqual(get_response['urls'], object['urls'])