python source code of worker

"""
Celery Worker Setup
"""

import abc
import asyncio
import logging
import os
import re
from functools import wraps
import subprocess
from importlib import import_module

import aiohttp

from celery import Celery, Task
from celery.signals import celeryd_init

from kombu import serialization
import simplejson

from ..githubhandler import GitHubAppHandler, GitHubHandler
from ..githandler import install_gpg_key
from ..utils import RepoData, setup_logger
from .config import (
    APP_ID, APP_KEY, CODE_SIGNING_KEY, BOT_NAME, REPODATA_TIMEOUT,
    APP_CLIENT_ID, APP_CLIENT_SECRET
)

logger = logging.getLogger(__name__)  # pylint: disable=invalid-name


class AsyncTask(Task):
    """Task class with support for async tasks

    We override celery.Task with our own version, with some extra
    features and defaults:

    - Since we already use a lot of async stuff elsewhere, it's useful
      to allow the ``run`` method of tasks be ``async``. This Task
      class detects if the method provided is a coroutine and runs it
      inside the asyncio event loop.

      >>> @app.task()
      >>> async def mytask(self, bind=True):
      >>>    await self.async_init()
      >>>    ...

    - Provide access to a GitHubAppHandler instance shared at least
      within the worker process.

      This is a little tedious. Since the task may be spawned some
      time after the webook that created it was triggered, the tokens
      we got inside the webserver may have timed out. In an attempt to
      avoid wasting API calls to create those tokens continuously, the
      Task class maintains a copy.

    - Default to ``acks_late = True``. The reason we use Celery at all
      is so that spawned tasks can survive a shutdown of the app.

    """
    #: Our tasks should be re-run if they don't finish
    acks_late = True

    #: Access the Github API
    ghapi: "GitHubHandler" = None

    #: Access Github App API
    ghappapi: "GitHubAppHandler" = None

    #: Stores the async run method when the sync run wrapper is installed
    _async_run = None

    def bind(self, app=None):
        """Intercept binding of task to (celery) app

        Here we take the half-finished generated Task class and
        replace the async run method with a sync run method that
        executes the original method inside the asyncio loop.
        """
        if asyncio.iscoroutinefunction(self.run):  # only for async funcs
            @wraps(self.run)
            def sync_run(*args, **kwargs):
                largs = list(args)  # need list so that pre-run can modify
                self.loop.run_until_complete(self.async_pre_run(largs, kwargs))
                return self.loop.run_until_complete(self._async_run(*largs, **kwargs))

            # swap run method with wrapper defined above
            self._async_run, self.run = self.run, sync_run

            if not self.loop.is_running():
                self.loop.run_until_complete(self.async_init())
        super().bind(app)

    async def async_init(self):
        """Init things that need to be run inside the loop

        This happens during binding -> on load.
        """
        if not self.ghappapi:
            self.ghappapi = GitHubAppHandler(aiohttp.ClientSession(), BOT_NAME,
                                             APP_KEY, APP_ID,
                                             APP_CLIENT_ID, APP_CLIENT_SECRET)


    async def async_pre_run(self, args, _kwargs):
        """Per-call async initialization

        Prepares the `ghapi` property for tasks.

        FIXME: doesn't replace kwargs
        """
        for num, arg in enumerate(args):
            if isinstance(arg, GitHubHandler):
                args[num] = await self.ghappapi.get_github_api(
                    False, arg.user, arg.repo,
                    arg.installation)

    @abc.abstractmethod
    def run(self, *_args, **_kwargs):
        """The tasks actual run method. Will be replaced during bind"""

    @property
    def loop(self):
        """Get the async loop - creating a new one if necessary"""
        try:
            return asyncio.get_event_loop()
        except RuntimeError:
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            return loop


def custom_dumps(string):
    """Serialize **s** to JSON accepting **for_json** serializer method"""
    return simplejson.dumps(string, for_json=True)


def custom_loads(string):
    """Deserialize **s** recreating objects

    JSON objects (dicts) containing a __type__ and a __module__
    field are turned into objects by loading and instantiating
    the type, passing the result dict from obj.for_json() to
    __init__().
    """
    def decode(obj):
        if isinstance(obj, dict):
            try:
                typ = obj.pop('__type__')
                mod = import_module(obj.pop('__module__'))
                klass = getattr(mod, typ)
                return klass(**obj)
            except KeyError:
                pass
        return obj
    return simplejson.loads(string, object_hook=decode)

# Register a custom serializer. We do this so we can conveniently
# transfer objects without resorting to pickling.
serialization.register('custom_json',
                       custom_dumps, custom_loads,
                       content_type='application/x-bioconda-json',
                       content_encoding='utf8')


# Instantiate Celery app, setting our AsyncTask as default
# task class and loading the tasks from tasks.py
capp = Celery(  # pylint: disable=invalid-name
    task_cls=AsyncTask,
    include=['bioconda_utils.bot.tasks']
)


# Celery must be configured at module level to catch worker as well
# Settings are suggestions from CloudAMPQ
capp.conf.update(
    # Set the URL to the AMQP broker using environment variable
    broker_url=os.environ.get('CLOUDAMQP_URL'),

    # Limit the number of connections to the pool. This should
    # be 2 when running on Heroku to avoid running out of free
    # connections on CloudAMPQ.
    #
    # broker_pool_limit=2,  # need two so we can inspect

    broker_heartbeat=None,
    broker_connection_timeout=30,

    # We don't feed back our tasks results
    result_backend='rpc://',
    event_queue_expires=60,
    worker_prefetch_multiplier=1,
    worker_concurrency=1,
    task_serializer='custom_json',
    accept_content=['custom_json', 'json']
    #task_acks_late=true
)


@celeryd_init.connect
def setup_new_celery_process(sender=None, conf=None, **_kwargs):
    """This hook is called when a celery worker is initialized

    Here we make sure that the GPG signing key is installed
    """
    install_gpg_key(CODE_SIGNING_KEY)
    RepoData().set_timeout(REPODATA_TIMEOUT)