python source code of safelife

"""
Logging utilities for recording SafeLife episodes and episode statistics.

This module contains a number of classes to make logging in SafeLife easier.
The `SafeLifeLogger` class does the bulk of the actual logging work: it
maintains handles and writes to test and training logs, writes data to
tensorboard, and records agent trajectories as movies and data archives.

There are two main functions that `SafeLifeLogger`, and, more generally, the
`BaseLogger` base class, implement. The `log_episode()` function logs
statistics for a single SafeLife episode, and is generally called by instances
of the `SafeLifeLogWrapper` class. The `log_scalars()` function logs arbitrary
scalar statistics to tensorboard. This can be used from within training
algorithms to monitor training progress (loss, value functions, etc.).
There is also a `cumulative_stats` attribute that contains the total number of
training episodes and steps taken, which can be helpful for setting
hyperparameter training schedules in the training algorithm or for setting a
curriculum for the environment itself.

The `RemoteSafeLifeLogger` class has the same interface, but it's suitable
for use in multiprocessing environments that use Ray. The actual logging work
is delegated to a remote actor with `RemoteSafeLifeLogger` instances holding on
to references to that actor. Importantly, this means that `RemoteSafeLifeLogger`
instances can be copied within or between processes without competing for
access to a single open log or tensorboard file.

Finally, the `SafeLifeLogWrapper` class can wrap `SafeLifeEnv` environment
instances to automatically log episodes upon completion. With this wrapper in
place, the training algorithms themselves don't actually need to log any extra
episode statistics; they just need to run episodes in the environment.
"""

import os
import time
import json
import textwrap
import logging
import logging.config
from datetime import datetime

import gym
import numpy as np

try:
    import ray
    ray_remote = ray.remote
except ImportError:
    ray = None
    def ray_remote(func): return func

from .helper_utils import load_kwargs
from .side_effects import side_effect_score
from .render_text import cell_name
from .render_graphics import render_file

logger = logging.getLogger(__name__)


class StreamingJSONWriter(object):
    """
    Serialize streaming data to JSON.

    This class holds onto an open file reference to which it carefully
    appends new JSON data. Individual entries are input in a list, and
    after every entry the list is closed so that it remains valid JSON.
    When a new item is added, the file cursor is moved backwards to overwrite
    the list closing bracket.
    """
    def __init__(self, filename, encoder=json.JSONEncoder):
        if os.path.exists(filename):
            self.file = open(filename, 'r+')
            self.delimeter = ','
        else:
            self.file = open(filename, 'w')
            self.delimeter = '['
        self.encoder = encoder

    def dump(self, obj):
        """
        Dump a JSON-serializable object to file.
        """
        data = json.dumps(obj, cls=self.encoder)
        close_str = "\n]\n"
        self.file.seek(max(self.file.seek(0, os.SEEK_END) - len(close_str), 0))
        self.file.write("%s\n    %s%s" % (self.delimeter, data, close_str))
        self.file.flush()
        self.delimeter = ','

    def close(self):
        self.file.close()


class BaseLogger(object):
    """
    Defines the interface for SafeLife loggers, both local and remote.
    """
    def __init__(self, logdir):
        self.logdir = logdir
        self.cumulative_stats = {
            'training_episodes': 0,
            'training_steps': 0,
            'testing_episodes': 0,
        }

    def log_episode(self, game, info={}, history=None, training=True):
        raise NotImplementedError

    def log_scalars(self, data, global_step=None, tag=None):
        raise NotImplementedError


class SafeLifeLogger(BaseLogger):
    """
    Logs episode statistics for SafeLife.

    Attributes
    ----------
    logdir : str
        Directory to save log data.
    cumulative_stats : dict
        Cumulative statistics for the training run. Includes
        ``training_steps``, ``training_episodes``, and ``testing_epsodes``.
        Note that this dictionary gets updated in place, so it can easily be
        passed to other functions to do e.g. hyperparameter annealing.
    training_video_name : str
        Format string for the training video files.
    testing_video_name : str
        Format string for the testing video files.
    training_video_interval : int
        Interval at which to save training videos. If 1, every episode is saved.
    testing_video_interval : int
        Interval at which to save testing videos.
    record_side_effects : bool
        If true (default), side effects are calculated at the end of each
        episode.
    summary_writer : tensorboardX.SummaryWriter
        Writes data to tensorboard. The SafeLifeLogger will attempt to create
        a new summary writer for the log directory if one is not supplied.
    """

    logdir = None
    cumulative_stats = None
    summary_writer = None

    training_video_name = "training-e{training_episodes}-s{training_steps}"
    testing_video_name = "testing-s{training_steps}-{level_name}"
    training_video_interval = 100
    testing_video_interval = 1

    training_log = "training-log.json"
    testing_log = "testing-log.json"

    record_side_effects = True

    _testing_log = None
    _training_log = None
    summary_writer = None

    console_training_msg = textwrap.dedent("""
        Training episode completed.
            level name: {level_name}
            episode #{training_episodes};  training steps = {training_steps}
            clock: {time}
            length: {length}
            reward: {reward} / {reward_possible} (exit cutoff = {reward_needed})
    """[1:-1])
    console_testing_msg = textwrap.dedent("""
        Testing episode completed.
            level name: {level_name}
            clock: {time}
            length: {length}
            reward: {reward} / {reward_possible} (exit cutoff = {reward_needed})
    """[1:-1])

    def __init__(self, logdir, **kwargs):
        load_kwargs(self, kwargs)

        self.logdir = logdir
        self.cumulative_stats = {
            'training_episodes': 0,
            'training_steps': 0,
            'testing_episodes': 0,
        }
        self._has_init = False

    def init_logdir(self):
        if not self._has_init and self.logdir:
            if self.testing_log:
                self._testing_log = StreamingJSONWriter(
                    os.path.join(self.logdir, self.testing_log))
            if self.training_log:
                self._training_log = StreamingJSONWriter(
                    os.path.join(self.logdir, self.training_log))
            if self.summary_writer is None:
                try:
                    from tensorboardX import SummaryWriter
                    self.summary_writer = SummaryWriter(self.logdir)
                except ImportError:
                    logger.error(
                        "Could not import tensorboardX. "
                        "SafeLifeLogger will not write data to tensorboard.")
        self._has_init = True

    def log_episode(self, game, info={}, history=None, training=True):
        """
        Log an episode. Outputs (potentially) to file, tensorboard, and video.

        Parameters
        ----------
        game : SafeLifeGame
        info : dict
            Episode data to log. Assumed to contain 'reward' and 'length' keys,
            as is returned by the ``SafeLifeEnv.step()`` function.
        history : dict
            Trajectory of the episode. Should contain keys 'board', 'goals',
            and 'orientations'.
        training : bool
            Whether to log output as a training or testing episode.
        """
        self.init_logdir()  # init if needed

        if training:
            self.cumulative_stats['training_episodes'] += 1
            self.cumulative_stats['training_steps'] += info.get('length', 0)
            num_episodes = self.cumulative_stats['training_episodes']
            history_name = (
                self.training_video_interval > 0 and
                (num_episodes - 1) % self.training_video_interval == 0 and
                self.training_video_name
            )
            console_msg = self.console_training_msg
        else:
            self.cumulative_stats['testing_episodes'] += 1
            num_episodes = self.cumulative_stats['testing_episodes']
            history_name = (
                self.testing_video_interval > 0 and
                (num_episodes - 1) % self.testing_video_interval == 0 and
                self.testing_video_name
            )
            console_msg = self.console_testing_msg

        # First, log to screen.
        log_data = info.copy()
        log_data.setdefault('reward', 0)
        log_data.setdefault('length', 0)
        log_data['level_name'] = game.title
        log_data['reward'] = float(log_data['reward'])
        log_data['reward_possible'] = float(game.initial_available_points)
        log_data['reward_needed'] = game.required_points()
        log_data['time'] = datetime.now().isoformat()
        logger.info(console_msg.format(**log_data, **self.cumulative_stats))

        # Then log to file.
        if self.record_side_effects:
            log_data['side_effects'] = side_effects = {
                cell_name(cell): effect
                for cell, effect in side_effect_score(game).items()
            }
        if training and self._training_log is not None:
            self._training_log.dump(log_data)
        elif self._testing_log is not None:
            self._testing_log.dump(log_data)

        # Log to tensorboard.
        tb_data = info.copy()
        # Use a normalized reward
        tb_data['reward_frac'] = (
            log_data['reward'] / max(log_data['reward_possible'], 1))
        tb_data.pop('reward')
        if training:
            tb_data['total_episodes'] = self.cumulative_stats['training_episodes']
            tb_data['reward_frac_needed'] = game.min_performance
        if self.record_side_effects and 'life-green' in side_effects:
            amount, total = side_effects['life-green']
            tb_data['side_effects'] = amount / max(total, 1)
        tag = "training_runs" if training else "testing_runs"
        self.log_scalars(tb_data, tag=tag)

        # Finally, save a recording of the trajectory.
        if history is not None and self.logdir is not None and history_name:
            history_name = history_name.format(**log_data, **self.cumulative_stats)
            history_name = os.path.join(self.logdir, history_name) + '.npz'
            if not os.path.exists(history_name):
                np.savez_compressed(history_name, **history)
                render_file(history_name, movie_format="mp4")

    def log_scalars(self, data, global_step=None, tag=None):
        """
        Log scalar values to tensorboard.

        Parameters
        ----------
        data : dict
            Dictionary of key/value pairs to log to tensorboard.
        tag : str or None

        """
        self.init_logdir()  # init if needed

        if not self.summary_writer:
            return
        tag = "" if tag is None else tag + '/'
        if global_step is None:
            global_step = self.cumulative_stats['training_steps']
        for key, val in data.items():
            if np.isreal(val) and np.isscalar(val):
                self.summary_writer.add_scalar(tag + key, val, global_step)
        self.summary_writer.flush()


class RemoteSafeLifeLogger(BaseLogger):
    """
    Maintains a local interface to a remote logging object using ray.

    The remote logging object is a ray Actor that does lightweight wrapping
    of a SafeLifeLogger instance. This means that the same RemoteSafeLifeLogger
    can be copied to different processes while maintaining a link to the same
    actor, retrieving the same global state, and writing to the same open files.

    Note that the ``cumulative_stats`` in the local copy will generally lag
    what is available on the remote copy. It is only updated whenever an
    episode is logged, and even then it is updated asynchronously.

    Parameters
    ----------
    logdir : str
        The directory in which to log everything.
    config_dict : dict
        A dictionary of options to pass to ``logging.config.dictConfig``
        in the standard python logging library. Note that unlike standard
        python multiprocessing, ray remote actors do not inherit the current
        processing logging configuration, so this needs to be reset.
    """
    max_backlog = 50
    update_interval = 0.01

    @ray_remote
    class SafeLifeLoggingActor(object):
        def __init__(self, logger, config_dict):
            self.logger = logger
            logger.init_logdir()
            if config_dict is not None:
                logging.config.dictConfig(config_dict)

        def log_episode(self, game, info, history, training):
            self.logger.log_episode(game, info, history, training)
            return self.logger.cumulative_stats

        def log_scalars(self, data, step, tag):
            self.logger.log_scalars(data, step, tag)

        def update_stats(self, cstats):
            self.logger.cumulative_stats = cstats

    def __init__(self, logdir, config_dict=None, **kwargs):
        if ray is None:
            raise ImportError("No module named 'ray'.")
        logger = SafeLifeLogger(logdir, **kwargs)
        self.logdir = logdir
        self.actor = self.SafeLifeLoggingActor.remote(logger, config_dict)
        self._cstats = logger.cumulative_stats.copy()

        # _promises stores references to remote updates to cumulative_stats
        # that will be received in response to having sent a log item. There
        # is no point exposing this state because there is in general no way
        # to get up-to-date statistics to any thread, and therefore no benefit
        # from knowing whether you're waiting for an update.
        self._promises = []

        self._last_update = time.time()

    @property
    def cumulative_stats(self):
        next_update = self._last_update + self.update_interval
        if self._promises and time.time() > next_update:
            timeout = 0 if len(self._promises) < self.max_backlog else None
            ready, self._promises = ray.wait(
                self._promises, len(self._promises), timeout=timeout)
            if ready:
                self._cstats = ray.get(ready[-1])
            self._last_update = time.time()
        return self._cstats

    @cumulative_stats.setter
    def cumulative_stats(self, stats):
        self._cstats = stats.copy()
        self.actor.update_stats.remote(stats)

    def log_episode(self, game, info, history=None, training=True):
        self._promises.append(
            self.actor.log_episode.remote(game, info, history, training))

    def log_scalars(self, data, step=None, tag=None):
        self.actor.log_scalars.remote(data, step, tag)


class SafeLifeLogWrapper(gym.Wrapper):
    """
    Records episode data and (optionally) full agent trajectories.

    Parameters
    ----------
    logger : SafeLifeLogger
        The logger performs the actual writing to disk.
        It should be an instance of SafeLifeLogger, or any other class that
        implements a ``log_episode()`` function.
    record_history : bool
        If True (default), the full agent trajectory is sent to the logger
        along with the game state and episode info dict.
    is_training : bool
        Flag passed along to the logger. Training and testing environments
        get logged somewhat differently.
    """

    logger = None
    record_history = True
    is_training = True

    def __init__(self, env, **kwargs):
        super().__init__(env)
        load_kwargs(self, kwargs)

    def step(self, action):
        observation, reward, done, info = self.env.step(action)

        if self.record_history and not self._did_log_episode:
            game = self.env.game
            self._episode_history['board'].append(game.board)
            self._episode_history['goals'].append(game.goals)
            self._episode_history['orientation'].append(game.orientation)

        if done and not self._did_log_episode and self.logger is not None:
            self._did_log_episode = True
            self.logger.log_episode(
                game, info.get('episode', {}),
                self._episode_history if self.record_history else None,
                self.is_training)

        return observation, reward, done, info

    def reset(self):
        observation = self.env.reset()

        self._did_log_episode = False
        self._episode_history = {
            'board': [],
            'goals': [],
            'orientation': []
        }

        return observation