python source code of docker

import errno
from fcntl import LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN
from itertools import takewhile, dropwhile
import logging
import os
import random
import re
import signal
import subprocess
import sys
import threading
import time

try:                  # Prefer system installation of Mesos protos if available
    from mesos_pb2 import *
    from containerizer_pb2 import *
except:
    from deimos.mesos_pb2 import *
    from deimos.containerizer_pb2 import *

import deimos.cgroups
from deimos.cmd import Run
import deimos.config
from deimos.containerizer import *
import deimos.docker
from deimos.err import Err
import deimos.logger
from deimos.logger import log
import deimos.mesos
import deimos.path
from deimos.proto import recordio
from deimos._struct import _Struct
import deimos.state
import deimos.sig


class Docker(Containerizer, _Struct):

    def __init__(self, workdir="/tmp/mesos-sandbox",
                       state_root="/tmp/deimos",
                       shared_dir="fs",
                       optimistic_unpack=True,
                       hooks=deimos.config.Hooks(),
                       container_settings=deimos.config.Containers(),
                       index_settings=deimos.config.DockerIndex()):
        _Struct.__init__(self, workdir=workdir,
                               state_root=state_root,
                               shared_dir=shared_dir,
                               optimistic_unpack=optimistic_unpack,
                               hooks=hooks,
                               container_settings=container_settings,
                               index_settings=index_settings,
                               runner=None,
                               state=None)

    def launch(self, launch_pb, *args):
        log.info(" ".join(args))
        fork = False if "--no-fork" in args else True
        deimos.sig.install(self.log_signal)
        run_options = []
        launchy = deimos.mesos.Launch(launch_pb)
        state = deimos.state.State(self.state_root,
                                   mesos_id=launchy.container_id)
        state.push()
        lk_l = state.lock("launch", LOCK_EX)
        state.executor_id = launchy.executor_id
        state.push()
        state.ids()
        mesos_directory()  # Redundant?
        if launchy.directory:
            os.chdir(launchy.directory)
        # TODO: if launchy.user:
        #           os.seteuid(launchy.user)
        url, options = launchy.container
        options, trailing_argv = split_on(options, "//")
        url, options = self.container_settings.override(url, options)

        true_argv = launchy.argv if trailing_argv is None else trailing_argv

        image = self.determine_image(url, launchy)
        log.info("image  = %s", image)
        run_options += ["--sig-proxy"]
        run_options += ["--rm"]       # This is how we ensure container cleanup
        run_options += ["--cidfile", state.resolve("cid")]

        place_uris(launchy, self.shared_dir, self.optimistic_unpack)
        run_options += ["-w", self.workdir]

        # Docker requires an absolute path to a source filesystem, separated
        # from the bind path in the container with a colon, but the absolute
        # path to the Mesos sandbox might have colons in it (TaskIDs with
        # timestamps can cause this situation). So we create a soft link to it
        # and mount that.
        shared_full = os.path.abspath(self.shared_dir)
        sandbox_symlink = state.sandbox_symlink(shared_full)
        run_options += ["-v", "%s:%s" % (sandbox_symlink, self.workdir)]

        cpus, mems = launchy.cpu_and_mem
        env = launchy.env
        run_options += options

        # We need to wrap the call to Docker in a call to the Mesos executor
        # if no executor is passed as part of the task. We need to pass the
        # MESOS_* environment variables in to the container if we're going to
        # start an executor.
        observer_argv = None
        if launchy.needs_observer:
            # NB: The "@@docker@@" variant is a work around for Mesos's option
            # parser. There is a fix in the pipeline.
            observer_argv = [mesos_executor(), "--override",
                             deimos.path.me(), "observe", state.mesos_id]
            state.lock("observe", LOCK_EX | LOCK_NB)     # Explanation of Locks
            # When the observer is running, we would like its call to
            # observe() to finish before all the wait(); and we'd like the
            # observer to have a chance to report TASK_FINISHED before the
            # calls to wait() report their results (which would result in a
            # TASK_FAILED).
            #
            # For this reason, we take the "observe" lock in launch(), before
            # we call the observer and before releasing the "launch" or "wait"
            # locks.
            #
            # Calls to observe() actually skip locking "observe"; but wait()
            # calls must take this lock. The "observe" lock is held by
            # launch() until the observer executor completes, at which point
            # we can be reasonably sure its status was propagated to the Mesos
            # slave.
        else:
            env += mesos_env() + [("MESOS_DIRECTORY", self.workdir)]

        # Flatten our env list of tuples into dictionary object for Popen
        popen_env = dict(env)

        self.place_dockercfg()

        runner_argv = deimos.docker.run(run_options, image, true_argv,
                                        env=env, ports=launchy.ports,
                                        cpus=cpus, mems=mems)

        log_mesos_env(logging.DEBUG)

        observer = None
        with open("stdout", "w") as o:        # This awkward multi 'with' is a
            with open("stderr", "w") as e:    # concession to 2.6 compatibility
                with open(os.devnull) as devnull:
                    log.info(deimos.cmd.present(runner_argv))

                    onlaunch = self.hooks.onlaunch
                    # test for default configuration (empty list)
                    if onlaunch:
                        # We're going to catch all exceptions because it's not
                        # in scope for Deimos to stack trace on a hook error
                        try:
                            subprocess.Popen(onlaunch, stdin=devnull,
                                             stdout=devnull,
                                             stderr=devnull,
                                             env=popen_env)
                        except Exception as e:
                            log.warning("onlaunch hook failed with %s" % e)

                    self.runner = subprocess.Popen(runner_argv, stdin=devnull,
                                                                stdout=o,
                                                                stderr=e)
                    state.pid(self.runner.pid)
                    state.await_cid()
                    state.push()
                    lk_w = state.lock("wait", LOCK_EX)
                    lk_l.unlock()
                    if fork:
                        pid = os.fork()
                        if pid is not 0:
                            state.ids()
                            log.info("Forking watcher into child...")
                            return
                    state.ids()
                    if observer_argv is not None:
                        log.info(deimos.cmd.present(observer_argv))
                        call = deimos.cmd.in_sh(observer_argv, allstderr=False)
                        # TODO: Collect these leaking file handles.
                        obs_out = open(state.resolve("observer.out"), "w+")
                        obs_err = open(state.resolve("observer.err"), "w+")
                        # If the Mesos executor sees LIBPROCESS_PORT=0 (which
                        # is passed by the slave) there are problems when it
                        # attempts to bind. ("Address already in use").
                        # Purging both LIBPROCESS_* net variables, to be safe.
                        for v in ["LIBPROCESS_PORT", "LIBPROCESS_IP"]:
                            if v in os.environ:
                                del os.environ[v]
                        observer = subprocess.Popen(call, stdin=devnull,
                                                          stdout=obs_out,
                                                          stderr=obs_err,
                                                          close_fds=True)
        data = Run(data=True)(deimos.docker.wait(state.cid()))
        state.exit(data)
        lk_w.unlock()
        for p, arr in [(self.runner, runner_argv), (observer, observer_argv)]:
            if p is None:
                continue
            thread = threading.Thread(target=p.wait)
            thread.start()
            thread.join(10)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGTERM after 10s"))
                p.terminate()
            thread.join(1)
            if thread.is_alive():
                log.warning(deimos.cmd.present(arr, "SIGKILL after 1s"))
                p.kill()
            msg = deimos.cmd.present(arr, p.wait())
            if p.wait() == 0:
                log.info(msg)
            else:
                log.warning(msg)

        with open(os.devnull) as devnull:
            ondestroy = self.hooks.ondestroy
            if ondestroy:
                # Deimos shouldn't care if the hook fails.
                # The hook should implement its own error handling.
                try:
                    subprocess.Popen(ondestroy, stdin=devnull,
                                     stdout=devnull,
                                     stderr=devnull,
                                     env=popen_env)
                except Exception as e:
                    log.warning("ondestroy hook failed with %s" % e)

        return state.exit()

    def update(self, update_pb, *args):
        log.info(" ".join(args))
        log.info("Update is a no-op for Docker...")

    def usage(self, usage_pb, *args):
        log.info(" ".join(args))
        container_id = usage_pb.container_id.value
        state = deimos.state.State(self.state_root, mesos_id=container_id)
        state.await_launch()
        state.ids()
        if state.cid() is None:
            log.info("Container not started?")
            return 0
        if state.exit() is not None:
            log.info("Container is stopped")
            return 0
        cg = deimos.cgroups.CGroups(**deimos.docker.cgroups(state.cid()))
        if len(cg.keys()) == 0:
            log.info("Container has no CGroups...already stopped?")
            return 0
        try:
            recordio.write(ResourceStatistics,
                           timestamp=time.time(),
                           mem_limit_bytes=cg.memory.limit(),
                           cpus_limit=cg.cpu.limit(),
                           # cpus_user_time_secs   = cg.cpuacct.user_time(),
                           # cpus_system_time_secs = cg.cpuacct.system_time(),
                           mem_rss_bytes=cg.memory.rss())
        except AttributeError as e:
            log.error("Missing CGroup!")
            raise e
        return 0

    def observe(self, *args):
        log.info(" ".join(args))
        state = deimos.state.State(self.state_root, mesos_id=args[0])
        self.state = state
        deimos.sig.install(self.stop_docker_and_resume)
        state.await_launch()
        try:  # Take the wait lock to block calls to wait()
            state.lock("wait", LOCK_SH, seconds=None)
        except IOError as e:                       # Allows for signal recovery
            if e.errno != errno.EINTR:
                raise e
            state.lock("wait", LOCK_SH, seconds=1)
        if state.exit() is not None:
            return state.exit()
        raise Err("Wait lock is not held nor is exit file present")

    def wait(self, wait_pb, *args):
        log.info(" ".join(args))
        container_id = wait_pb.container_id.value
        state = deimos.state.State(self.state_root, mesos_id=container_id)
        self.state = state
        deimos.sig.install(self.stop_docker_and_resume)
        state.await_launch()
        try:  # Wait for the observe lock so observe completes first
            state.lock("observe", LOCK_SH, seconds=None)
            state.lock("wait", LOCK_SH, seconds=None)
        except IOError as e:                       # Allows for signal recovery
            if e.errno != errno.EINTR:
                raise e
            state.lock("observe", LOCK_SH, seconds=1)
            state.lock("wait", LOCK_SH, seconds=1)
        termination = (state.exit() if state.exit() is not None else 64) << 8
        recordio.write(Termination,
                       killed=False,
                       message="",
                       status=termination)
        if state.exit() is not None:
            return state.exit()
        raise Err("Wait lock is not held nor is exit file present")

    def destroy(self, destroy_pb, *args):
        log.info(" ".join(args))
        container_id = destroy_pb.container_id.value
        state = deimos.state.State(self.state_root, mesos_id=container_id)
        state.await_launch()
        lk_d = state.lock("destroy", LOCK_EX)
        if state.exit() is None:
            Run()(deimos.docker.stop(state.cid()))
        else:
            log.info("Container is stopped")
        return 0

    def containers(self, *args):
        log.info(" ".join(args))
        data = Run(data=True)(deimos.docker.docker("ps", "--no-trunc", "-q"))
        mesos_ids = []
        for line in data.splitlines():
            cid = line.strip()
            state = deimos.state.State(self.state_root, docker_id=cid)
            if not state.exists():
                continue
            try:
                state.lock("wait", LOCK_SH | LOCK_NB)
            except deimos.flock.Err:     # LOCK_EX held, so launch() is running
                mesos_ids += [state.mesos_container_id()]
        containers = Containers()
        for mesos_id in mesos_ids:
            container = containers.containers.add()
            container.value = mesos_id
        recordio.writeProto(containers)
        return 0

    def log_signal(self, signum):
        pass

    def stop_docker_and_resume(self, signum):
        if self.state is not None and self.state.cid() is not None:
            cid = self.state.cid()
            log.info("Trying to stop Docker container: %s", cid)
            try:
                Run()(deimos.docker.stop(cid))
            except subprocess.CalledProcessError:
                pass
            return deimos.sig.Resume()

    def determine_image(self, url, launchy):
        opts = dict(self.container_settings.image.items(onlyset=True))
        if "default" in opts:
            default = url_to_image(opts["default"])
        else:
            default = self.image_from_system_context(launchy)
        image = url_to_image(url)
        return default if image == "" else image

    def image_from_system_context(self, launchy):
        opts = dict(self.index_settings.items(onlyset=True))
        if "account_libmesos" in opts:
            if not launchy.needs_observer:
                opts["account"] = opts["account_libmesos"]
            del opts["account_libmesos"]
        if "dockercfg" in opts:
            del opts["dockercfg"]
        return deimos.docker.matching_image_for_host(**opts)

    def place_dockercfg(self):
        dockercfg = self.index_settings.dockercfg
        if dockercfg is not None:
            log.info("Copying to .dockercfg: %s" % dockercfg)
            Run()(["cp", dockercfg, ".dockercfg"])


def url_to_image(url):
    pre, image = re.split(r"^docker:///?", url)
    if pre != "":
        raise Err("URL '%s' is not a valid docker:// URL!" % url)
    return image


def split_on(iterable, element):
    preceding = list(takewhile(lambda _: _ != element, iterable))
    following = list(dropwhile(lambda _: _ != element, iterable))
    return preceding, (following[1:] if len(following) > 0 else None)