python source code of Batch

#  Copyright (C) 2014-2015 CEA/DAM/DIF
#
#  This file is part of PCOCC, a tool to easily create and deploy
#  virtual machines using the resource manager of a compute cluster.
#
#  PCOCC is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  PCOCC is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with PCOCC. If not, see <http://www.gnu.org/licenses/>

import os
import pwd
import yaml
import socket
import re
import shutil
import errno
import subprocess
import sys
import random
import datetime
import logging
import jsonschema
import etcd
import etcd.auth
import atexit
import binascii
import stat
import psutil
import signal
import argparse
import uuid
import threading

from ClusterShell.NodeSet import NodeSet, NodeSetException, RangeSet
from abc import ABCMeta, abstractmethod

from .Tbon import  UserCA
from .Config import Config
from .Backports import subprocess_check_output
from .Error import PcoccError, InvalidConfigurationError
from .Misc import fake_signalfd, wait_or_term_child
from .Misc import CHILD_EXIT, datetime_to_epoch, stop_threads



class BatchError(PcoccError):
    """Generic exception for Batch related issues
    """
    def __init__(self, error):
        super(BatchError, self).__init__(error)

class InvalidJobError(BatchError):
    """Exception raised when a specific job cannot be found or doesn't belong to
    the user
    """
    def __init__(self, error):
        super(InvalidJobError, self).__init__('Unable to find job: '
                                              + error)

class NoJobError(BatchError):
    """Exception raised when there is no job specified or implied by
    the current context but one is required to continue
    """
    def __init__(self):
        super(NoJobError, self).__init__('The target job was neither specified '
                                         'nor implied and could not be guessed '
                                         'by name')

class AllocationError(BatchError):
    """Exception raised when an allocation fails
    """
    def __init__(self, error):
        super(AllocationError, self).__init__('Failure in job allocation: '
                                              + error)

class KeyTimeoutError(BatchError):
    """Exception raised after a time out waiting for a key
    """
    def __init__(self, key):
        super(KeyTimeoutError, self).__init__('Timeout waiting for key: ' + key)

class KeyCredentialError(BatchError):
    """Exception raised if the user cannot be authenticated
    """
    def __init__(self, error):
        super(KeyCredentialError, self).__init__(
            'Keystore authentication error: ' + error)

# Schema to validate batch.yaml config file
batch_config_schema = """
type: object
properties:
  type:
    enum:
      - slurm
      - local
  settings:
    type: object
    properties:
      etcd-servers:
        type: array
      etcd-client-port:
        type: integer
      etcd-protocol:
        enum:
          - http
          - https
      etcd-ca-cert:
        type: string
      etcd-auth-type:
        enum:
          - password
          - munge
          - none
      batch-args:
        type: array
    additionalProperties: false
    required:
      - etcd-servers
      - etcd-client-port
      - etcd-protocol
      - etcd-auth-type
required:
  - type
  - settings
"""

class ProcessType(object):
    """Enum class defining the type of process wrt batch management"""
    # Privileged process for node setup
    SETUP = 1
    # Hypervisor process running one VM
    HYPERVISOR = 2
    # Launcher process
    LAUNCHER = 3
    # User process related to a job
    USER = 4
    # Other user process
    OTHER = 5


ETCD_PASSWORD_BYTES = 16


class BatchManager(object):
    __metaclass__ = ABCMeta
    """Manages all interactions with the batch environment"""
    def load(batch_config_file, batchid, batchname, default_batchname,
             proc_type, batchuser):
        """Factory function to initialize a batch manager"""
        try:
            stream = file(batch_config_file, 'r')
            batch_config = yaml.safe_load(stream)
        except yaml.YAMLError as err:
            raise InvalidConfigurationError(str(err))
        except IOError as err:
            raise InvalidConfigurationError(str(err))

        try:
            jsonschema.validate(batch_config,
                                yaml.safe_load(batch_config_schema))
        except jsonschema.exceptions.ValidationError as err:
            raise InvalidConfigurationError(str(err))

        settings = batch_config['settings']

        if batch_config['type'] == 'slurm':
            return SlurmManager(
                batchid, batchname, default_batchname,
                settings, proc_type, batchuser)
        elif batch_config['type'] == 'local':
            return LocalManager(
                batchid, batchname, default_batchname,
                settings, proc_type, batchuser)
        else:
            raise InvalidConfigurationError("Invalid batch manager type")


    def __init__(self, batchid, batchname, default_batchname, settings,
                 proc_type, batchuser):

        self.proc_type = proc_type

        # "abstract" properties
        self.batchid = 0
        self.batchuser = None
        self.nodeset = None
        self.node_rank = None
        self.cluster_state_dir = None
        self.vm_state_dir_prefix = None
        self.pcocc_state_dir = None

    def find_job_by_name(self, user, batchname, host=None):
        """Return a jobid matching a user and batchname

        There must be one and only one job matching the specified criteria

        """
        raise PcoccError("Not implemented")

    @abstractmethod
    def run(self, cluster, run_opt, cmd):
        """Launch the VM tasks"""
        raise PcoccError("Not implemented")

    @abstractmethod
    def alloc(self, cluster, alloc_opt, cmd):
        """Allocate an interactive job"""
        raise PcoccError("Not implemented")

    @abstractmethod
    def _only_in_a_job(self):
        """Called on each node at the resource deletion step"""
        raise PcoccError("Not implemented")

    def batch(self, cluster, alloc_opt, cmd):
        """Allocate a batch job"""
        raise PcoccError("Not implemented")

    def init_node(self):
        """Called on each node at the init step"""
        pass

    def create_resources(self):
        """Called on each node at the resource creation step"""
        pass

    def delete_resources(self, force=False):
        """Called on each node at the resource deletion step"""
        pass

    @abstractmethod
    def vm_count(self):
        """Returns the number of VMs in the cluster"""
        pass

    @property
    def task_rank(self):
        """Returns the rank of the current process in the SLURM job

        This is only valid for hypervisor processes

        """
        raise PcoccError("Not implemented")

    @property
    def coreset(self):
        """Returns the list of cores allocated to the current task

        Only valid for hypervisor processes

        """
        self._only_in_a_job()
        # Assume we've been bound to our cores by the batch manager
        taskset = subprocess_check_output(['hwloc-bind', '--get']).strip()
        coreset = subprocess_check_output(['hwloc-calc', '--intersect', 'Core',
                                           taskset]).strip()

        return RangeSet(coreset)

    @property
    def num_cores(self):
        """Returns the number of cores allocated per task

        Only valid for hypervisor processes

        """
        raise PcoccError("Not implemented")

    @property
    def cluster_definition(self):
        """Returns the cluster definition passed to the spank plugin

        This is only valid for node setup processes

        """
        raise PcoccError("Not implemented")

    def get_host_rank(self, rank):
        """Returns rank of the host where the specified task rank runs"""
        raise PcoccError("Not implemented")

    def get_rank_on_host(self, rank):
        """Returns the relative rank of the specified task rank on its host

        """
        raise PcoccError("Not implemented")

    def get_rank_host(self, rank):
        """Returns the hostname where the specified task rank runs"""
        self._only_in_a_job()
        return self.nodeset[self.get_host_rank(rank)]

    def is_rank_local(self, rank):
        """ True if the specified process rank is allocated on the current node
        """
        self._only_in_a_job()
        return self.node_rank == self.get_host_rank(rank)

    @property
    def num_nodes(self):
        """Returns the number of host nodes in the job"""
        self._only_in_a_job()
        return len(self.nodeset)

    @property
    def _in_a_job(self):
        return self.batchid != 0

    @property
    def ca_cert(self):
        """Returns a CA certificate for authenticating the user (CLI and hypervisor agents)

        """
        raise PcoccError("Not implemented")

    @property
    def client_cert(self):
        """Returns a Certificate for authenticating the user (clientss)

        """
        raise PcoccError("Not implemented")

    load = staticmethod(load)


def _retry_on_cred_expiry(func):
    """Wraps etcd call to automtically regenerate expired credentials"""
    def _wrapped_func(*args, **kwargs):
        while True:
            try:
                return func(*args, **kwargs)
            except etcd.EtcdException as e:
                args[0]._try_renew_credential(e) # pylint: disable=W0212
    return _wrapped_func

class EtcdManager(BatchManager):
    """Common class for batch managers based on etcd"""
    def __init__(self, batchid, batchname, default_batchname, settings,
                 proc_type, batchuser):

        super(EtcdManager, self).__init__(
            batchid, batchname, default_batchname, settings,
            proc_type, batchuser)

        # Load settings
        self._etcd_servers = settings['etcd-servers']
        self._etcd_ca_cert = settings.get('etcd-ca-cert', None)
        self._etcd_client_port = settings['etcd-client-port']
        self._etcd_protocol = settings['etcd-protocol']
        self._etcd_auth_type = settings['etcd-auth-type']
        if self._etcd_auth_type == 'password':
            self._etcd_password = None
        self._ca_cert = None

    def _init_vm_dir(self):
        self._only_in_a_job()
        try:
            os.mkdir(self._get_vm_state_dir(self.task_rank), 0o700)
        except OSError as e:
            raise PcoccError('Failed to create temporary directory for '
                             'VM data: ' + str(e))

        atexit.register(self._clean_vm_dir)

    def _init_cluster_dir(self):
        self._only_in_a_job()
        if not os.path.exists(self.cluster_state_dir):
            os.makedirs(self.cluster_state_dir)
            atexit.register(self._clean_cluster_dir)

    def _clean_cluster_dir(self):
        self._only_in_a_job()
        if os.path.exists(self.cluster_state_dir):
            shutil.rmtree(self.cluster_state_dir)

    def _clean_vm_dir(self):
        self._only_in_a_job()
        if os.path.exists(self._get_vm_state_dir(self.task_rank)):
            shutil.rmtree(self._get_vm_state_dir(self.task_rank))

    def get_cluster_state_path(self, name):
        """ Return path to store cluster state file """
        return os.path.join(self.cluster_state_dir, name)

    def _get_vm_state_dir(self, rank):
        return '%s_%d' % (self.vm_state_dir_prefix, rank)

    def get_vm_state_path(self, rank, name):
        """ Return path to store vm state file """
        return '%s/%s' % (self._get_vm_state_dir(rank), name)

    def _only_in_a_job(self):
        if not self._in_a_job:
            raise NoJobError()

    def _is_user_key_type(self, key_type):
        return key_type.endswith('user')

    def _is_cluster_key_type(self, key_type):
        return key_type.startswith('cluster')

    def infer_user_and_alloc_id(self, user, batchid, key_type):
        ruser = user
        rid = batchid
        if ruser is None and self._is_user_key_type(key_type):
            ruser = self.batchuser
        if rid is None and self._is_cluster_key_type(key_type):
            self._only_in_a_job()
            rid = self.batchid
        return ruser, rid

    def read_key(self, key_type, key,
                 blocking=False, timeout=0,
                 user=None,
                 batchid=None):
        """Reads a key from keystore

        Returns None if the key doesn't exist except if blocking is
        True, then we block until the key is set or the timeout
        expires.

        """
        user, batchid = self.infer_user_and_alloc_id(user, batchid, key_type)

        val, index = self.read_key_index(key_type, key, user=user, batchid=batchid)
        if val or not blocking:
            return val

        while not val:
            val, index = self.wait_key_index(key_type, key, index,
                                             timeout=timeout,
                                             user=user,
                                             batchid=batchid)

        return val.value




    @_retry_on_cred_expiry
    def read_key_index(self, key_type, key,
                       realindex=False,
                       user=None,
                       batchid=None):
        """Reads a key and its modification index from keystore

        By default, return an index suitable for watches, for updates
        use realindex=True.

        Returns None if the key doesn't exist.

        """
        user, batchid = self.infer_user_and_alloc_id(user, batchid, key_type)

        key_path = self.get_key_path(key_type, key,
                                     user, batchid)

        try:
            ret = self.keyval_client.read(key_path)
        except etcd.EtcdKeyNotFound as e:
            return None, e.payload['index']

        if realindex:
            return ret.value, ret.modifiedIndex
        else:
            return ret.value, max(ret.modifiedIndex,
                                  ret.etcd_index)

    def read_dir(self, key_type, key, user=None, batchid=None):
        """Reads a directory from keystore

        Returns None if the directory doesn't exist. Otherwise,
        returns the full directory content (as returned by the etcd
        lib)

        """
        user, batchid = self.infer_user_and_alloc_id(user, batchid, key_type)
        val, _ = self.read_dir_index(key_type, key)
        return val

    @_retry_on_cred_expiry
    def read_dir_index(self, key_type, key, user=None, batchid=None):
        """Reads a directory from keystore

        Returns None if the directory doesn't exist. Otherwise,
        returns the full directory content (as returned by the etcd
        lib) and associated modification index

        """
        user, batchid = self.infer_user_and_alloc_id(user, batchid, key_type)

        key_path = self.get_key_path(key_type, key, user, batchid)
        try:
            val = self.keyval_client.read(key_path, recurse = True)
        except etcd.EtcdKeyNotFound as e:
            return None, e.payload['index']

        return val, max(val.modifiedIndex,
                        val.etcd_index)

    @_retry_on_cred_expiry
    def write_ttl(self, key_type, key, value, ttl):
        """Write a single key with a ttl"""
        key_path = self.get_key_path(key_type, key)
        self.keyval_client.write(key_path, value, ttl=ttl)

    @_retry_on_cred_expiry
    def write_key(self, key_type, key, value):
        """Write a single key"""
        key_path = self.get_key_path(key_type, key)
        return self.keyval_client.write(key_path, value)

    @_retry_on_cred_expiry
    def write_key_index(self, key_type, key, value, index):
        """Write a single key using compare and swap on the index"""
        key_path = self.get_key_path(key_type, key)
        return self.keyval_client.write(key_path, value,
                                        prevIndex=index)

    @_retry_on_cred_expiry
    def write_key_new(self, key_type, key, value):
        """Write a single key if it didnt exist"""
        key_path = self.get_key_path(key_type, key)
        return self.keyval_client.write(key_path, value,
                                        prevExist=False)

    @_retry_on_cred_expiry
    def atom_update_key(self, key_type, key, func, *args, **kwargs):
        """Wrap a function to atomically update a key

        Read the current value of the key and pass it to the wrapped
        function which returns the updated value. Then, try to
        update the value with compare and swap and restart the whole
        process if there was a race.

        """
        while True:
            try:
                value, index = self.read_key_index(key_type, key,
                                                   realindex=True)
                nargs = args + (value,)
                new_value, ret = func(*nargs, **kwargs)

                logging.debug(
                    "Trying atomic update \"%s\" for \"%s\" ",
                        str(value).strip(),
                        str(new_value).strip())

                if value is None:
                    if new_value is None:
                        return ret
                    else:
                        self.write_key_new(key_type, key, new_value)
                else:
                    self.write_key_index(key_type, key, new_value,
                                         index)

                return ret
            except ( etcd.EtcdCompareFailed,
                     etcd.EtcdKeyNotFound,
                     etcd.EtcdAlreadyExist ):
                logging.debug("Retrying atomic update")

    @_retry_on_cred_expiry
    def make_dir(self, key_type, key):
        """Create a directory"""
        key_path = self.get_key_path(key_type, key)
        self.keyval_client.write(key_path, False, dir = True)

    @_retry_on_cred_expiry
    def delete_key(self, key_type, key):
        """Delete a key

        This fails for directories
        """
        key_path = self.get_key_path(key_type, key)
        self.keyval_client.delete(key_path, recursive = False, dir = False)

    @_retry_on_cred_expiry
    def delete_dir(self, key_type, key):
        """Delete a directory

        Also succeeds for keys
        """
        key_path = self.get_key_path(key_type, key)
        try:
            self.keyval_client.delete(key_path, recursive = True, dir = True)
        except etcd.EtcdNotDir:
            self.delete_key(self, key_type, key)

    @_retry_on_cred_expiry
    def wait_key_index(self, key_type, key, index,
                       timeout = 0,
                       user=None,
                       batchid=None):
        """Wait until a key is updated from the specified index"""
        user, batchid = self.infer_user_and_alloc_id(user, batchid, key_type)

        key_path = self.get_key_path(key_type, key, user, batchid)

        while True:
            try:
                ret = self.keyval_client.watch(key_path, recursive = True,
                                         index = index + 1, timeout = timeout)
                return ret, max(ret.modifiedIndex,
                              ret.etcd_index)
            except etcd.EtcdWatchTimedOut:
                logging.info("Timeout while waiting for key " + key_path)
                raise KeyTimeoutError(key_path)
            except etcd.EtcdEventIndexCleared as e:
                return None, e.payload['index']
            except etcd.EtcdClusterIdChanged:
                return None, e.payload['index']

    @_retry_on_cred_expiry
    def wait_child_count(self, key_type, key, count):
        """Wait until a directory has the specified number of elements"""
        while True:
            ret, last_index  = self.read_dir_index(key_type, key)
            if ret:
                num_complete = len([child for child in ret.children])
            else:
                num_complete = 0

            if num_complete == count:
                return ret

            self.wait_key_index(key_type, key, last_index, timeout=30)

    def get_key_path(self, key_type, key,
                     user=None,
                     batchid=None):
        """Returns the path of a key

        Global keys are global to the whole physical cluster whereas
        cluster keys are per virtual cluster/job.

        User keys are writable by the user whereas standard keys may
        only be written as root.

        """
        user, batchid = self.infer_user_and_alloc_id(user, batchid, key_type)

        if key_type == 'global':
            return '/pcocc/global/{0}'.format(key)
        if key_type == 'global/user':
            return '/pcocc/global/users/{0}/{1}'.format(user, key)
        elif key_type == 'cluster':
            return '/pcocc/cluster/{0}/{1}'.format(batchid, key)
        elif key_type == 'cluster/user':
            return '/pcocc/cluster/users/{0}/{1}/{2}'.format(user,
                                                             batchid,
                                                             key)
        else:
            raise KeyError(key_type)

    @property
    def keyval_client(self):
        try:
            return self._keyval_client
        except AttributeError:
            hosts_tuple = [ (host, self._etcd_client_port) for
                            host in self._etcd_servers ]
            random.shuffle(hosts_tuple)
            hosts_tuple = tuple(hosts_tuple)
            logging.debug('Starting etcd client')
            self._keyval_client = etcd.Client(
                host=hosts_tuple,
                ca_cert=self._etcd_ca_cert,
                protocol=self._etcd_protocol,
                allow_reconnect=True,
                read_timeout=10,
                username=self._get_keyval_username(),
                password=self._get_keyval_credential())

            logging.info('Started etcd client')
            self._last_cred_renew = datetime.datetime.utcnow()

            return self._keyval_client

    def _try_renew_credential(self, e):
        # Expired credential status
        if (hasattr(e.payload, "get") and (
                e.payload.get("errorCode", 0) == 110 or
                e.payload.get("error_code", 0) == 110 or
                e.payload.get("status", 0) == 401 )):

            delta = datetime.datetime.utcnow() - self._last_cred_renew

            if delta > datetime.timedelta(seconds=15):
                logging.debug('Renewing etcd credentials')
                self._last_cred_renew = datetime.datetime.utcnow()
                self._keyval_client.password = self._get_keyval_credential()
                return
            else:
                raise KeyCredentialError('access denied')

        raise e

    def _get_keyval_credential(self):
        if self._etcd_auth_type == 'munge':
            return subprocess_check_output(['/usr/bin/munge', '-n'])
        elif self._etcd_auth_type == 'password':
            if self._etcd_password is None:
                self._init_password()
            return self._etcd_password
        elif self._etcd_auth_type == 'none':
            return None

    def _get_keyval_username(self):
        if self._etcd_auth_type == 'none':
            return None
        else:
            return pwd.getpwuid(os.getuid()).pw_name

    def _init_password(self):
        bad_perms = (stat.S_IRGRP |
                     stat.S_IWGRP |
                     stat.S_IROTH |
                     stat.S_IWOTH)

        if os.getuid() == 0:
            try:
                pwd_path = os.path.join(Config().conf_dir,
                                 'etcd-password')

                st = os.stat(pwd_path)
                if st.st_mode & bad_perms:
                    logging.warning('Loose permissions on password file ' +
                                    pwd_path)
                self._etcd_password = open(
                    os.path.join(Config().conf_dir,
                                 'etcd-password')).read().strip()
            except:
                raise KeyCredentialError('unable to read password file')
        else:
            pwd_path = os.path.join(self.pcocc_state_dir,
                                    '.etcd-password')
            try:
                st = os.stat(pwd_path)
                if st.st_mode & bad_perms:
                    logging.warning('Loose permissions on password file ' +
                                    pwd_path)

                self._etcd_password = open(pwd_path).read().strip()
                if len(self._etcd_password) != 2 * ETCD_PASSWORD_BYTES:
                    raise KeyCredentialError(
                        'password file {0} is invalid, '
                        'please delete it and allocate '
                        'a new virual cluster'.format(pwd_path))
                else:
                    return
            except (OSError, IOError) as e:
                # Only generate a password if the file is missing
                # Hypervisor processes should not have to do this
                # as it should have been done beforehand
                if (e.errno != errno.ENOENT or
                    self.proc_type == ProcessType.HYPERVISOR):
                    raise KeyCredentialError('unable to read password file')

            logging.info('Password is not set, generating a new one')

            # Try to generate a password
            try:
                os.mkdir(self.pcocc_state_dir)
            except OSError as e:
                logging.debug(str(e))

            try:
                self._etcd_password = binascii.b2a_hex(
                    os.urandom(ETCD_PASSWORD_BYTES))
                f = os.open(pwd_path,
                            os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o400)
                os.write(f, self._etcd_password)
                os.close(f)
            except OSError as e:
                raise KeyCredentialError('unable to generate password: ' + str(e))

    def init_cluster_keys(self):
        if self._etcd_auth_type != 'none':
            role = '{0}-pcocc'.format(self.batchuser)
            logging.info('Initializing etcd role %s', role)
            u = etcd.auth.EtcdRole(self.keyval_client, role)
            u.grant('/pcocc/cluster/*', 'R')
            u.grant('/pcocc/global/public/*', 'R')
            u.grant('/pcocc/cluster/users/{0}/*'.format(self.batchuser), 'RW')
            u.grant('/pcocc/global/users/{0}/*'.format(self.batchuser), 'RW')
            u.write()

            logging.info('Initializing etcd user %s', self.batchuser)

            u = etcd.auth.EtcdUser(self.keyval_client, self.batchuser)
            try:
                u.read()
            except etcd.EtcdKeyNotFound:
                pass
            u.roles = list(u.roles) + [role]
            if self._etcd_auth_type == 'password':
                requested_cred = os.environ.get('SPANK_PCOCC_REQUEST_CRED', '')
                if (len(requested_cred) == 2 * ETCD_PASSWORD_BYTES
                    and u.password != requested_cred):
                    logging.info('Updating password with ' + requested_cred)
                    u.password = requested_cred

            u.write()
            self.make_dir('cluster/user', '')


    def cleanup_cluster_keys(self):
        try:
            logging.debug('Setting self-destruct on cluster etcd keystore')
            self.keyval_client.write(self.get_key_path('cluster', ''),
                                     None, dir=True, prevExist=True, ttl=600)
            self.keyval_client.write(self.get_key_path('cluster/user', ''),
                                     None, dir=True, prevExist=True, ttl=600)
        except:
            logging.warning('Failed to cleanup cluster etcd keystore')

    def populate_env(self):
        """ Populate environment variables with batch related info to propagate """
        os.putenv('PCOCC_JOB_ID', str(self.batchid))

    @property
    def ca_cert(self):
        """Returns a CA certificate for authenticating the user

        """
        if not self._ca_cert:
            self._ca_cert = UserCA.load_yaml(Config().batch.read_key('cluster/user', 'ca_cert',
                                                                     blocking=True))
        return self._ca_cert

    @property
    def client_cert(self):
        return self.ca_cert

# Schema to validate the global pkey state in the key/value store
local_job_allocation_schema = """
type: object
properties:
  jobs:
    type: object
    patternProperties:
      "^([0-9]+)+$":
        type: object
        properties:
          batchname:
            type: string
          coreset:
            type: string
          definition:
            type: string
          uuid:
            type: string
          host:
            type: string
          user:
            type: string
          start:
            type: integer
        required:
          - batchname
          - definition
          - uuid
          - host
          - user
          - start
        additionalProperties: no
  next_batchid:
    type: integer
additionalProperties: no
"""
class LocalManager(EtcdManager):
    def __init__(self, batchid, batchname, default_batchname, settings,
                 proc_type, batchuser):

        super(LocalManager, self).__init__(
            batchid, batchname, default_batchname, settings,
            proc_type, batchuser)

        # Find the uid: if we are executed as a management plugin via
        # sudo the uid will be set as an env var, otherwise we can use
        # the current user unless a specific user is requested for
        # commands which support it
        if batchuser and self.proc_type == ProcessType.USER:
            self.batchuser = batchuser
        else:
            try:
                self.batchuser = os.environ['SUDO_USER']
            except KeyError:
                self.batchuser = pwd.getpwuid(os.getuid()).pw_name

        self.pcocc_state_dir = os.path.join(os.path.expanduser('~/.pcocc'))

        # Find the job id.
        # Look in order at the specified job id, job name, environment variable,
        # and default job name
        self.batchid = 0
        if batchid:
            self.batchid = batchid
        elif batchname:
            self.batchid = self.find_job_by_name(self.batchuser, batchname)
        elif not batchuser:
            try:
                self.batchid = int(os.environ['PCOCC_LOCAL_JOB_ID'])
            except KeyError:
                if default_batchname:
                    try:
                        self.batchid = self.find_job_by_name(self.batchuser,
                                                             default_batchname)
                    except InvalidJobError:
                        pass


        if self.batchid == 0 or self.proc_type == ProcessType.OTHER:
            # Not related to a job, no need to initialize job state
            return


        job_record = self._get_job_record(self.batchid)
        self.nodeset = NodeSet(job_record['host'])
        # Only on node per job in local mode
        self.node_rank = 0

        # Define working directories
        self.node_state_dir = '/tmp/.pcocc_%s_node' % (self.batchid)
        self.vm_state_dir_prefix = '/tmp/.pcocc_%s_vm' % (self.batchid)
        self.cluster_state_dir = os.path.join(self.pcocc_state_dir,
                                              'job_%s' % (self.batchid))


        if self.proc_type == ProcessType.HYPERVISOR:
            self._init_vm_dir()

        if self.proc_type == ProcessType.LAUNCHER:
            self._init_cluster_dir()

    def alloc(self, cluster, alloc_opt, cmd):
        if self._in_a_job:
            raise AllocationError("already in a job")

        if self._etcd_auth_type == 'password':
            os.environ['SPANK_PCOCC_REQUEST_CRED'] = self._get_keyval_credential()

        os.environ['PCOCC_LOCAL_CLUSTER_DEFINITION'] = cluster.resource_definition

        parser = argparse.ArgumentParser()
        parser.add_argument('-c','--cpus-per-vm', type=int, default='1',
                            dest='ncpus')
        parser.add_argument('-J','--job-name', type=str, default='pcocc',
                            dest='jobname')
        parser.add_argument('--core-set', type=str, default='',
                            dest='coreset')
        parser.add_argument('-n', '--nvms', type=int, default=1,
                            dest='nvms')
        parser.add_argument('-m', '--mem-per-core', type=int, default='1000',
                            dest='mpc')

        alloc_opt = parser.parse_args(alloc_opt)

        if (alloc_opt.mpc < 1):
            raise AllocationError('invalid mem-per-core: {0}'.format(
                alloc_opt.mpc))
        os.environ['PCOCC_LOCAL_MEM_PER_CPU'] = str(alloc_opt.mpc)

        if (alloc_opt.ncpus < 1):
            raise AllocationError('invalid cpus-per-vm: {0}'.format(
                alloc_opt.ncpus))
        os.environ['PCOCC_LOCAL_CPUS_PER_VM'] = str(alloc_opt.ncpus)

        if not re.match(r'[a-zA-Z_]\w*', alloc_opt.jobname):
            raise AllocationError('invalid job-name: {0}'.format(
                alloc_opt.jobname))
        os.environ['PCOCC_LOCAL_JOB_NAME'] = alloc_opt.jobname

        if alloc_opt.coreset:
            try:
                _ = RangeSet(alloc_opt.coreset)
            except:
                raise AllocationError('invalid core-set: {0}'.format(
                    alloc_opt.coreset))
            os.environ['PCOCC_LOCAL_CORE_SET'] = alloc_opt.coreset

        self._req_uuid = uuid.uuid4()
        os.environ['PCOCC_LOCAL_JOB_UUID'] = str(self._req_uuid)

        # TODO: Only one VM in local mode for now
        os.environ['PCOCC_LOCAL_PROCID'] = '0'
        if (alloc_opt.nvms != 1):
            raise AllocationError('local batch manager only '
                                  'supports 1 VM per cluster')

        jobid_in_use = None
        try:
            jobid_in_use = self.find_job_by_name(self.batchuser,
                                                 alloc_opt.jobname,
                                                 socket.gethostname().split('.')[0])
        except:
            pass

        if not jobid_in_use is None:
            logging.warning('Job name %s is already in use by %s',
                    alloc_opt.jobname,
                    jobid_in_use)

        self._run_pid = 0
        self._shutdown = False

        # Make sure we don't get spuriously interrupted
        # once we start allocating host resources
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        term_sigfd = fake_signalfd([signal.SIGTERM, signal.SIGABRT])

        subprocess.check_call(['sudo',
                               'pcocc'] + Config().verbose_opt +
                               ['internal', 'setup', 'init'])
        atexit.register(self._run_resource_cleanup)

        subprocess.check_call(['sudo',
                               'pcocc'] + Config().verbose_opt +
                               ['internal', 'setup', 'create'])

        self.batchid = self._uuid_to_batchid(self.batchuser, self._req_uuid)
        os.environ['PCOCC_LOCAL_JOB_ID'] = str(self.batchid )

        heartbeat = threading.Thread(None, self._hearbeat_thread)
        heartbeat.start()

        # We expect to be given 60s to shutdown so give 50s to
        # our child process
        r, _, s = wait_or_term_child(subprocess.Popen(cmd),
                                     signal.SIGTERM, term_sigfd, 50)
        stop_threads.set()
        if s == CHILD_EXIT.KILL:
            raise PcoccError('VM launcher did not acknowlege VM shutdown ' +
                             'request after SIGTERM was received')
        return r

    def _hearbeat_thread(self):
        while not stop_threads.wait(30):
            self._update_heartbeat()

    def _run_resource_cleanup(self):
        os.environ['PCOCC_LOCAL_JOB_ID'] = str(self._uuid_to_batchid(self.batchuser,
                                                                       self._req_uuid))
        subprocess.call(['sudo', 'pcocc'] + Config().verbose_opt +
                         ['internal', 'setup', 'delete'])

    def run(self, cluster, run_opt, cmd):
        """Launch the VM tasks"""
        return subprocess.Popen(cmd)

    @property
    def task_rank(self):
        self._only_in_a_job()
        return int(os.environ['PCOCC_LOCAL_PROCID'])

    @property
    def cluster_definition(self):
        return os.environ.get('PCOCC_LOCAL_CLUSTER_DEFINITION')

    @property
    def mem_per_core(self):
        """Returns the amount of memory allocated per core in MB"""
        self._only_in_a_job()
        return int(os.environ['PCOCC_LOCAL_MEM_PER_CPU'])

    @property
    def num_cores(self):
        """Returns the number of cores allocated per task

        Only valid for hypervisor processes

        """
        self._only_in_a_job()
        return int(os.environ['PCOCC_LOCAL_CPUS_PER_VM'])

    def get_host_rank(self, rank):
        self._only_in_a_job()
        return 0

    def get_rank_on_host(self, rank):
        self._only_in_a_job()
        return rank

    def _cpuset_cluster(self, batchid=None):
        if batchid is None:
            batchid = self.batchid

        return os.path.join(self._cpuset_base(), 'pcocc', str(batchid))

    def _cpuset_base(self):
        cpuset_base = subprocess.check_output(['lssubsys', '-m', 'cpuset'])
        cpuset_base = cpuset_base.split(' ')[1].strip()
        return cpuset_base

    def _validate_jobname(self, batchname):
        if not re.match('[a-zA-Z0-9_-]+', batchname):
            raise InvalidJobError('Invalid characters in job name {0}'.format(
                batchname))

    def _job_allocation_key(self):
        return 'public/batch-local/job_allocation_state'

    def _cleanup_orphan_jobs(self):
        """Cleanup jobs which were not properly deleted
        """
        job_alloc_state = self.read_key('global',
                                        self._job_allocation_key())
        job_alloc_state = self._validate_job_state(job_alloc_state)

        for batchid, job in job_alloc_state['jobs'].iteritems():
            if job['host'] == socket.gethostname().split('.')[0]:
                f = None
                try:
                    f = open(os.path.join(self._cpuset_cluster(int(batchid)), 'tasks'))
                except IOError:
                    pids = ''

                if f:
                    pids = f.read().splitlines()
                    f.close()

                if not pids:
                    logging.warning('Trying to clean orphan job %s', batchid)
                    subprocess.call(['pcocc'] + Config().verbose_opt +
                                     ['internal', 'setup', 'delete', '-j', batchid, '--nolock'])

    def _list_alive_jobs(self):
        path = self.get_key_path('global/user', 'batch-local/heartbeat')
        d = self.read_dir('global/user', 'batch-local/heartbeat')
        if d is None:
            return []

        batchids = []
        for child in d.children:
            if child.key == path:
                continue
            else:
                try:
                    batchids.append(int(os.path.split(child.key)[-1]))
                except:
                    logging.warning('Invalid heartbeat entry for '
                                    'user %s: %s',
                                        self.batchuser,
                                        child.key
                                    )
        return batchids

    def _update_heartbeat(self, ttl=60):
        _ = self.write_ttl('global/user',
                           'batch-local/heartbeat/{0}'.format(self.batchid),
                           '',
                           ttl)

    def list_all_jobs(self, include_expired=False, details=False):
        """List all jobs in the cluster

        Returns a list of the batchids of all jobs in the cluster

        """
        job_alloc_state = self.read_key('global',
                                        self._job_allocation_key())
        job_alloc_state = self._validate_job_state(job_alloc_state)


        user_live_batchids = self._list_alive_jobs()

        jobs = {}
        for batchid, job in job_alloc_state['jobs'].iteritems():
            batchid = int(batchid)
            if (include_expired or
                job['user'] != self.batchuser or
                datetime_to_epoch(datetime.datetime.utcnow()) - job['start'] < 5 or
                batchid in user_live_batchids):

                jobs[batchid] = job
            else:
                logging.warning('list_all_jobs: ignoring stale job %d on %s ',
                    batchid, job['host'])

        if details:
            return jobs
        else:
            return list(jobs.keys())

    def get_job_details(self, user=None):
        """Gather details about pcocc jobs
        """
        ret = []
        for batchid, job in self.list_all_jobs(details=True).iteritems():
            if user and job['user'] != user:
                continue

            elapsed = datetime_to_epoch(datetime.datetime.utcnow()) - job['start']
            entry = {'batchid':    str(batchid),
                     'user':       job['user'],
                     'exectime':   str(datetime.timedelta(seconds=elapsed)),
                     'timelimit':  'N/A',
                     'partition':  'N/A',
                     'node_count':  job['host'],
                     'state':      job['host'],
                     'jobname':    job['batchname']}


            ret.append(entry)

        return ret


    def find_job_by_name(self, user, batchname,
                         host=None):

        job_alloc_state = self.read_key('global',
                                        self._job_allocation_key())
        job_alloc_state = self._validate_job_state(job_alloc_state)

        batchids = []
        hosts = []
        for batchid, job in job_alloc_state['jobs'].iteritems():
            if (job['user'] == user and job['batchname'] == batchname):
                if host and job['host'] == host:
                    return int(batchid)
                elif not host:
                    if job['host'] == socket.gethostname().split('.')[0]:
                        return int(batchid)
                    else:
                        batchids.append(int(batchid))
                        hosts.append(job['host'])


        if not batchids:
            raise InvalidJobError('no valid match for name '+ batchname)

        if len(batchids) > 1:
            raise InvalidJobError('name {0} is ambiguous (exists on {1})'.format(
                batchname, ', '.join(hosts)))

        return batchids[0]

    def _get_job_record(self, batchid):
        job_alloc_state = self.read_key('global',
                                        self._job_allocation_key())
        job_alloc_state = self._validate_job_state(job_alloc_state)

        try:
            return job_alloc_state['jobs'][str(batchid)]
        except KeyError:
            raise InvalidJobError('no job record for batchid ' + str(batchid))

    def _do_alloc_job(self, user, batchname, uuid, definition, job_alloc_state):
        """Helper to allocate a jobname"""
        job_alloc_state = self._validate_job_state(job_alloc_state)

        try:
            batchid = self._uuid_to_batchid(user, uuid, job_alloc_state)
        except AllocationError:
            batchid = -1

        if batchid != -1:
            raise AllocationError(
                'uuid {0} already in use by job {1} on host {2}'.format(
                    uuid, batchid, job_alloc_state['jobs'][str(batchid)]['host']))


        try:
            batchid = self.find_job_by_name(user, batchname,
                                            socket.gethostname().split('.')[0])
        except InvalidJobError:
            batchid = -1

        if batchid != -1:
            raise AllocationError(
                'Jobname {0} already in use by job {1} on host {2}'.format(
                    batchname, batchid, job_alloc_state['jobs'][str(batchid)]['host']))


        batchid = job_alloc_state['next_batchid']
        job_alloc_state['next_batchid'] = batchid + 1

        job_alloc_state['jobs'][str(batchid)] = {
            'batchname': batchname,
            'definition': definition,
            'uuid': str(uuid),
            'user': user,
            'host': socket.gethostname().split('.')[0],
            'start': datetime_to_epoch(datetime.datetime.utcnow())
        }

        job_alloc_state = self._validate_job_state(job_alloc_state)
        return yaml.dump(job_alloc_state), batchid

    def _do_free_job(self, user, uuid, job_alloc_state):
        """Helper to allocate a jobname"""
        job_alloc_state = self._validate_job_state(job_alloc_state)

        batchid = self._uuid_to_batchid(user, uuid, job_alloc_state)
        job_record = job_alloc_state['jobs'].pop(str(batchid))

        job_alloc_state = self._validate_job_state(job_alloc_state)
        return yaml.dump(job_alloc_state), job_record

    def _validate_job_state(self, state):
        if state is None:
            job_alloc_state = {'jobs': {}, 'next_batchid': 1}
        elif isinstance(state, dict):
            job_alloc_state = state
        else:
            job_alloc_state = yaml.safe_load(state)

        schema = yaml.safe_load(local_job_allocation_schema)
        jsonschema.validate(job_alloc_state, schema)

        return job_alloc_state

    def _uuid_to_batchid(self, user, uuid, job_alloc_state=None):
        if job_alloc_state is None:
            job_alloc_state = self.read_key('global',
                                            self._job_allocation_key())

        job_alloc_state = self._validate_job_state(job_alloc_state)

        try:
            for batchid, job in job_alloc_state['jobs'].iteritems():
                if job['uuid'] == str(uuid):
                    return int(batchid)
        except KeyError:
            pass

        raise AllocationError('Unable to find job with uuid {0}'.format(uuid))

    def vm_count(self):
        return 1

    def init_node(self):
        self._cleanup_orphan_jobs()

    def create_resources(self):
        req_jobname = os.getenv('PCOCC_LOCAL_JOB_NAME', None)
        req_uuid = os.getenv('PCOCC_LOCAL_JOB_UUID', None)
        caller_pid = psutil.Process(os.getppid()).ppid()

        if not req_jobname:
            raise AllocationError('Job name was not specified')
        self._validate_jobname(req_jobname)

        if not req_uuid:
            raise AllocationError('Job uuid was not specified')

        try:
            req_uuid = uuid.UUID(req_uuid)
        except Exception:
            raise AllocationError('Invalid uuid')

        self.batchid = self.atom_update_key(
            'global',
            self._job_allocation_key(),
            self._do_alloc_job,
            self.batchuser,
            req_jobname,
            req_uuid,
            self.cluster_definition)
        self._update_heartbeat()


        # Create cpuset cgroup and move caller into it
        try:
            with open(os.path.join(
                    self._cpuset_base(), 'cgroup.clone_children'), 'w') as f:
                f.write('1')

            os.makedirs(self._cpuset_cluster())
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise BatchError(
                    'Unable to set requested cpuset: ' + str(e))

        with open(os.path.join(self._cpuset_cluster(), 'tasks'), 'w') as f:
            f.write(str(caller_pid))

        try:
            cores = os.environ.get('PCOCC_LOCAL_CORE_SET', None)
            if cores:
                cores = RangeSet(cores)
                pus = subprocess_check_output(['hwloc-calc',
                                               '--intersect', 'PU',
                                               'cores:{0}'.format(cores)]).strip()

                with open(os.path.join(self._cpuset_cluster(),
                                       'cpuset.cpus'), 'w') as f:
                    f.write(str(pus))
        except Exception as e:
            raise BatchError('Unable to set requested cpuset: ' + str(e))

        self.node_rank=0
        self.nodeset=NodeSet(socket.gethostname().split('.')[0])

    def delete_resources(self, force=False):
        if not self.batchid:
            raise AllocationError('Job id was not specified')

        job_record = self._get_job_record(self.batchid)
        if self.batchuser != 'root' and job_record['user'] != self.batchuser:
            raise AllocationError('Wrong user for job {0}'.format(self.batchid))

        remote = False
        if job_record['host'] != socket.gethostname().split('.')[0]:
            if force:
                remote = True
            else:
                raise AllocationError('Wrong host for job {0}'.format(self.batchid))

        if not remote:
            caller_pid = psutil.Process(os.getppid()).ppid()
            try:
                f = open(os.path.join(self._cpuset_cluster(), 'tasks'))
            except IOError:
                logging.warning('No cpuset for job %s', self.batchid)
                f = None

            if f:
                pids = f.read().splitlines()

                # Only the allocation process is allowed to delete resources
                # while there are still active processes
                if pids and (str(caller_pid) not in pids) and not force:
                    raise BatchError('There are still running processes for job '
                                     '{0} ({1})'.format(
                            self.batchid, ' '.join(pids)))

                for pid in pids:
                    pid = int(pid)
                    try:
                        if ((psutil.Process(pid).username() == self.batchuser) and
                            pid != caller_pid and pid != os.getppid()
                            and pid != os.getpid()):
                            os.kill(pid, signal.SIGKILL)
                    except psutil.NoSuchProcess:
                        pass
                    except OSError:
                        pass

                f.close()

        try:
            job_record = self.atom_update_key(
                'global',
                self._job_allocation_key(),
                self._do_free_job,
                self.batchuser,
                job_record['uuid'])

            self._update_heartbeat(0)
        except:
            logging.error('No allocation record to delete '
                          'matching job %s for user %s',
                          self.batchid,
                          self.batchuser)
        if remote:
            logging.warning('Exiting without performing host resource cleanup for '
                            'forced remote job deletion')
            sys.exit(1)

        # Allow recovering from the jobid if the allocation process
        # died without calling resource deletion
        if os.environ.get('PCOCC_LOCAL_CLUSTER_DEFINITION', None) is None:
            os.environ['PCOCC_LOCAL_CLUSTER_DEFINITION'] = job_record['definition']



class SlurmManager(EtcdManager):
    def __init__(self, batchid, batchname, default_batchname, settings,
                 proc_type, batchuser):

        super(SlurmManager, self).__init__(
            batchid, batchname, default_batchname, settings,
            proc_type, batchuser)

        # Parse batch settings
        self._batch_args = settings.get('batch-args', [])

        # At init time we get all the necessery info about the job state
        # from the batch scheduler
        self._rank_map = []

        if batchuser and self.proc_type == ProcessType.USER:
            # For some CLI invocations, we allow targeting arbitrary users
            self.batchuser = batchuser
        else:
            # Find the uid: if we are executed as a management plugin,
            # it has to be provided as an evironment variable by
            # SLURM. Otherwise, we default to the current user.
            try:
                uid = int(os.environ['SLURM_JOB_UID'])
            except KeyError:
                if self.proc_type == ProcessType.SETUP:
                    raise
                # Assume the user is the caller
                uid = os.getuid()

            self.batchuser = pwd.getpwuid(uid).pw_name

        # Find the job id.
        # Look in order at the specified job id, job name, environment variable,
        # and default job name
        self.batchid = 0
        if batchid:
            self.batchid = batchid
        elif batchname:
            self.batchid = self.find_job_by_name(self.batchuser, batchname)
        elif not batchuser:
            try:
                self.batchid = int(os.environ['SLURM_JOB_ID'])
            except KeyError:
                if default_batchname:
                    try:
                        self.batchid = self.find_job_by_name(self.batchuser,
                                                             default_batchname)
                    except InvalidJobError:
                        pass

        self.pcocc_state_dir = os.path.join(os.path.expanduser('~/.pcocc'))

        if self.batchid == 0 or self.proc_type == ProcessType.OTHER:
            # Not related to a job, no need to initialize job state
            return

        # If we are inside the allocation we can get the nodelist from an
        # environment variable. Otherwise, we'll have to query it with squeue
        if ('SLURM_NODELIST' in os.environ and
            self.batchid == int(os.environ['SLURM_JOB_ID'])):
            self.nodeset = NodeSet(os.environ['SLURM_NODELIST'])
        else:
            try:
                self.nodeset = NodeSet(
                    subprocess_check_output(['squeue', '-j',
                                             str(self.batchid),
                                             '-u', self.batchuser,
                                             '-h', '-o', '%N']))
            except subprocess.CalledProcessError:
                raise InvalidJobError('no valid match for id '+ str(self.batchid))

            except NodeSetException:
                raise InvalidJobError('no valid match for id '+ str(self.batchid))

        # Define working directories
        self.node_state_dir = '/tmp/.pcocc_%s_node' % (self.batchid)
        self.vm_state_dir_prefix = '/tmp/.pcocc_%s_vm' % (self.batchid)
        self.cluster_state_dir = os.path.join(self.pcocc_state_dir,
                                              'job_%s' % (self.batchid))

        # Compute the rank of our node among the allocated nodes
        # FIXME: This assumes the slurm nodeset is based on host names
        hostname = socket.gethostname().split('.')[0]
        for i, node in enumerate(self.nodeset):
            if node == hostname:
                self.node_rank = i
                break
        else:
            self.node_rank = -1

        # For now, we let the batch manager handle VM placement and do not allow
        # the user to set this at the cluster definition level.
        # We compute the vm rank to host mapping at resource allocation time
        # which we store for later pcocc commands to use
        if (self.proc_type == ProcessType.SETUP or
            self.proc_type == ProcessType.LAUNCHER or
            self.proc_type == ProcessType.HYPERVISOR):
            self._build_rank_map()
        else:
            self._load_rank_map()

        if self.proc_type == ProcessType.HYPERVISOR:
            self._init_vm_dir()

        if self.proc_type == ProcessType.LAUNCHER:
            self._init_cluster_dir()

    def find_job_by_name(self, user, batchname, host=None):
        """Return a jobid matching a user and batchname

        There must be one and only one job matching the specified criteria

        """

        cmd = [ 'squeue' , '-n', batchname, '-u', user,
                '-h', '-o', '%i' ]
        if host:
            cmd += ['-w', host]

        try:
            batchid = subprocess_check_output(cmd)
        except subprocess.CalledProcessError:
            raise InvalidJobError('no valid match for name '+ batchname)

        if not batchid:
            raise InvalidJobError('no valid match for name '+ batchname)

        try:
            return int(batchid)
        except ValueError:
            raise InvalidJobError('name %s is ambiguous' % batchname)

    def list_all_jobs(self):
        """List all jobs in the cluster
        Returns a list of the batchids of all jobs in the cluster
        (including non pcocc jobs)
        """
        try:
            joblist = subprocess_check_output(['squeue', '-ho',
                                               '%A']).split()
            return [ int(j) for j in joblist ]
        except subprocess.CalledProcessError as err:
            raise BatchError('Unable to retrieve SLURM job list: ' + str(err))

    def get_job_details(self, user=None):
        """Gather details about pcocc jobs
        """

        candidates = []
        try:
            d = self.keyval_client.read('/pcocc/cluster/')
        except etcd.EtcdKeyNotFound:
            # No pcocc jobs, no need to go further
            return []

        for child in d.children:
            try:
                candidates.append(int(os.path.split(child.key)[-1]))
            except ValueError:
                pass

        if user:
            user_filter = ['-u', user]
        else:
            user_filter = []

        try:
            joblist = subprocess_check_output(['squeue'] + user_filter +
                                              ['-ho','%A %u %M %l %P %D %t %j']).split("\n")
        except subprocess.CalledProcessError as err:
            raise BatchError('Unable to retrieve SLURM job list: ' + str(err))

        ret = []

        for line in joblist:
            if not line:
                continue
            entry = line.split()
            try:
                parsed_entry = {'batchid':    int(entry[0]),
                                'user':       entry[1],
                                'exectime':   entry[2],
                                'timelimit':  entry[3],
                                'partition':  entry[4],
                                'node_count': int(entry[5]),
                                'state':      entry[6],
                                'jobname':    ' '.join(entry[7:])}
            except (IndexError, ValueError):
                logging.error('Skipping unexpected slurm output: %s-', line)
                continue

            if user and parsed_entry['user'] != user:
                continue

            if parsed_entry['batchid'] not in candidates:
                continue

            ret.append(parsed_entry)

        return ret

    def list_user_jobs(self, user):
        all_jobs = self.list_all_jobs()
        return [ j for j in all_jobs if j["user"] == user ]


    def _build_rank_map(self, tasks_per_node=None):
        self._only_in_a_job()
        node_index = 0

        assert(not self._rank_map)

        if not tasks_per_node:
            tasks_per_node = os.environ['SLURM_TASKS_PER_NODE']

        for node_def in tasks_per_node.split(','):
            match = re.search(r'(\d+)\(x(\d+)\)', node_def)
            if match:
                ntasks = int(match.group(1))
                nnodes = int(match.group(2))
            else:
                ntasks = int(node_def)
                nnodes = 1

            for _ in xrange(nnodes):
                for _ in xrange(ntasks):
                    self._rank_map.append(node_index)
                node_index += 1

        if (self.proc_type == ProcessType.SETUP and
            self.node_rank == 0):
            self.write_key('cluster', 'rank_map',
                           yaml.dump(self._rank_map))

    def _load_rank_map(self):
        if self._rank_map:
            raise BatchError("Rank map was already loaded")

        data = self.read_key(
            'cluster', 'rank_map', blocking=True)
        if not data:
            raise BatchError("Unable to load rank map")

        self._rank_map = yaml.safe_load(data)

    def vm_count(self):
        return len(self._rank_map)

    def run(self, cluster, run_opt, cmd):
        """Launch the VM tasks"""
        return subprocess.Popen(['srun'] + run_opt +
                                ['--vm', cluster.resource_definition] +
                                cmd)

    def alloc(self, cluster, alloc_opt, cmd):
        """Allocate an interactive job"""
        if self._in_a_job:
            raise AllocationError("already in a job")
        try:
            if self._etcd_auth_type == 'password':
                os.environ['PCOCC_REQUEST_CRED'] = self._get_keyval_credential()
            #Make sure user owned keys and cert are generated
            os.environ['SLURM_DISTRIBUTION'] = 'block:block'
            ret = subprocess.call(['salloc'] + self._batch_args + alloc_opt + cmd)
        except KeyboardInterrupt:
            raise AllocationError("interrupted")

        return ret

    def batch(self, cluster, alloc_opt, cmd):
        """Allocate a batch job"""
        try:
            if self._etcd_auth_type == 'password':
                os.environ['PCOCC_REQUEST_CRED'] = self._get_keyval_credential()
            os.environ['SLURM_DISTRIBUTION'] = 'block:block'
            subprocess.check_call(['sbatch'] +
                                  ['-J', 'pcocc','--signal', '15'] +
                                  self._batch_args +
                                  alloc_opt +
                                  [cmd])
        except subprocess.CalledProcessError as err:
            raise AllocationError(str(err))

    @property
    def task_rank(self):
        """Returns the rank of the current process in the SLURM job

        This is only valid for hypervisor processes

        """
        self._only_in_a_job()
        return int(os.environ['SLURM_PROCID'])

    @property
    def cluster_definition(self):
        """Returns the cluster definition passed to the spank plugin

        This is only valid for node setup processes

        """
        self._only_in_a_job()
        return os.environ['SPANK_PCOCC_SETUP']

    @property
    def num_nodes(self):
        """Returns the number of host nodes in the job"""
        self._only_in_a_job()
        return len(self.nodeset)

    @property
    def _in_a_job(self):
        return self.batchid != 0

    @property
    def mem_per_core(self):
        """Returns the amount of memory allocated per core in MB"""
        self._only_in_a_job()

        raw_output = subprocess_check_output(
            ['scontrol', 'show', 'jobid=%d' % (self.batchid)])

        # First, assume the memory was specified on a per cpu basis:
        match = re.search(r'MinMemoryCPU=(\d+)M', raw_output)
        if match:
            return int(match.group(1))

        match = re.search(r'MinMemoryCPU=(\d+)G', raw_output)
        if match:
            return int(match.group(1)) * 1024

        # Else, try a per node basis:
        match = re.search(r'MinMemoryNode=(\d+)M', raw_output)
        if match:
            return int(match.group(1)) // self.num_cores

        match = re.search(r'MinMemoryNode=(\d+)G', raw_output)
        if match:
            return int(match.group(1)) * 1024 // self.num_cores

        raise BatchError("Failed to read memory per core")

    @property
    def num_cores(self):
        """Returns the number of cores allocated per task

        Only valid for hypervisor processes

        """
        self._only_in_a_job()

        try:
            return int(os.environ['SLURM_CPUS_PER_TASK'])
        except KeyError:
            # The variable isn't defined when not
            # provided explicitely
            return 1

    @property
    def coreset(self):
        """Returns the list of cores allocated to the current task

        Only valid for hypervisor processes
-
        """
        self._only_in_a_job()
        # Assume we've been bound to our cores by SLURM
        taskset = subprocess_check_output(['hwloc-bind', '--get']).strip()
        coreset = subprocess_check_output(['hwloc-calc', '--intersect', 'Core',
                                           taskset]).strip()

        return RangeSet(coreset)

    def get_host_rank(self, rank):
        """Returns rank of the host where the specified task rank runs"""
        self._only_in_a_job()
        return self._rank_map[rank]

    def get_rank_on_host(self, rank):
        """Returns the relative rank of the specified task rank on its host

        """
        self._only_in_a_job()
        host_rank = self._rank_map[rank]
        rank_on_host = 0

        while ( (rank - rank_on_host >= 0) and
                (self._rank_map[rank - rank_on_host] == host_rank) ):
            rank_on_host = rank_on_host + 1

        return rank_on_host - 1

    def populate_env(self):
        """ Populate environment variables with batch related info to propagate """
        os.putenv('PCOCC_JOB_ID', str(self.batchid))
        os.putenv('PCOCC_JOB_NAME', os.environ.get('SLURM_JOB_NAME', ''))