# Copyright (c) 2019 Alibaba Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== from __future__ import absolute_import from __future__ import print_function from __future__ import division import logging from collections import OrderedDict import numpy as np import re, sys, os import gym.spaces import tensorflow as tf from easy_rl.utils.utils import prod import easy_rl.utils.hooks logger = logging.getLogger(__name__) class Executor(object): """class that handles the runtime for Agent classes Maintain the session obj and its related issues Attributes: observation_space (obj): gym.spaces.Space object for specifying the shapes and types of input observation(s). action_space (obj): gym.spaces.Space object for specifying the shapes and types of the action space. _is_single_channel (bool): indicates whether the observation is single-channel. ob_ph_spec (obj): parse observation_space to guide `Model` object for building placeholders. flattened_ob_shape (int): the shape of each (flattened) observation. action_ph_spec (tuple): the type (refers to the action distribution) and dims of action space. do_summary (bool): control whether to run summary ops at each `session.run()`. session (obj): a TensorFlow MonitoredTrainingSession object. """ def __init__(self, observation_space, action_space): """Construct an Executor object Create TensorFlow placeholders as the input nodes of Model classes first and set up the session later. Arguments: observation_space (gym.spaces.space obj): specify the shapes and types of observed states. action_space (gym.spaces.space obj): specify the shapes and types of actions. """ self.observation_space = observation_space self.action_space = action_space self._prepare_ph_spec() def _prepare_ph_spec(self): """Build the TensorFlow placeholders according to the `observation_space`. Forbid multi-channel observations where any individual channel is recursively defined as a multi-channel observation. `_prepare_ph_spec()` can easily handle recursively defined observations, but they introduce unnecessary complexity to the TensorFlow FIFOqueues used for data exchanging in distributed setting. """ if isinstance(self.observation_space, gym.spaces.Tuple): self._is_single_channel = False self.ob_ph_spec = list() for sp in self.observation_space.spaces: assert type(sp) not in [ gym.spaces.Tuple, gym.spaces.Dict ], "forbidden type {}".format(self.observation_space) self.ob_ph_spec.append(self._basic_space_to_ph_spec(sp)) self.flattened_ob_shape = (np.sum( [s[1][1] for s in self.ob_ph_spec]), ) elif isinstance(self.observation_space, gym.spaces.Dict): self._is_single_channel = False self.ob_ph_spec = OrderedDict() for sp_name, sp in self.observation_space.spaces.items(): assert type(sp) not in [ gym.spaces.Tuple, gym.spaces.Dict ], "forbidden type {}".format(self.observation_space) self.ob_ph_spec[sp_name] = self._basic_space_to_ph_spec(sp) self.flattened_ob_shape = (np.sum( [s[1][1] for s in self.ob_ph_spec.values()]), ) else: self._is_single_channel = True self.ob_ph_spec = self._basic_space_to_ph_spec( self.observation_space) self.flattened_ob_shape = self.ob_ph_spec[1][1:] if isinstance(self.action_space, gym.spaces.Discrete): self.action_ph_spec = (self.action_space.n, "Categorical") elif isinstance(self.action_space, gym.spaces.Box): self.action_ph_spec = (prod(self.action_space.shape), "DiagGaussian") else: raise ValueError("specified an unsupported action space {}".format( self.action_space)) def _basic_space_to_ph_spec(self, sp): """Translate a gym space object to a tuple to specify data type and shape. Arguments: sp (obj): basic space object of gym interface. Returns: a tuple used for building TensorFlow placeholders where the first element specifies `dtype` and the second one specifies `shape`. """ # (jones.wz) TO DO: handle gym Atari input if isinstance(sp, gym.spaces.Box): if len(sp.shape) == 3: return (tf.uint8, (None, ) + sp.shape) return (tf.float32, (None, prod(sp.shape))) elif isinstance(sp, gym.spaces.Discrete): return (tf.int32, (None, sp.n)) elif isinstance(sp, gym.spaces.MultiDiscrete): return (tf.int32, (None, prod(sp.shape))) elif isinstance(sp, gym.spaces.MultiBinary): return (tf.int32, (None, prod(sp.shape))) else: raise TypeError( "specified an unsupported space type {}".format(sp)) def flatten_obs(self, obs): """reshape the multi-channel observations into a flattern array for efficient communication in distributed training. Arguments: obs (obj): dict or list of numpy array for multi-channel observations. Returns: flattened_obs (tensor): a flattened array. """ if isinstance(self.ob_ph_spec, list): assert len(obs) == len( self.observation_space ), "{} spaces for obs but {} inputs found".format( len(self.observation_space), len(obs)) flattened_array = np.concatenate( [np.asarray(elm).astype(np.float32) for elm in obs], axis=1) elif isinstance(self.ob_ph_spec, OrderedDict): array_list = [] for name in self.ob_ph_spec.keys(): array_list.append(np.asarray(obs[name]).astype(np.float32)) flattened_array = np.concatenate(array_list, axis=1) else: flattened_array = obs return flattened_array def reshape_flattened_obs(self, flattened_obs): """recovery the nested structure of flattened_obs Arguments: flattened_obs (obj): flattened array. Returns: the original nested struct of input obs. """ tf2np_dtype = { tf.float32: np.float32, tf.float64: np.float64, tf.bool: np.bool, tf.int32: np.int32, tf.int64: np.int64, tf.int8: np.int8 } if isinstance(self.ob_ph_spec, list): restore_obs = [] cur_idx = 0 for ph_dtype, ph_shape in self.ob_ph_spec: np_type = tf2np_dtype.get(ph_dtype, np.float32) restore_obs.append( np.asarray(flattened_obs[:, cur_idx:cur_idx + ph_shape[1]]).astype(np_type)) cur_idx += ph_shape[1] elif isinstance(self.ob_ph_spec, OrderedDict): restore_obs = {} cur_idx = 0 for name, ph_tuple in self.ob_ph_spec.items(): ph_dtype, ph_shape = ph_tuple np_type = tf2np_dtype.get(ph_dtype, np.float32) restore_obs[name] = np.asarray( flattened_obs[:, cur_idx:cur_idx + ph_shape[1]]).astype(np_type) else: restore_obs = flattened_obs return restore_obs def feed_observations(self, placeholder, obs, train_size=None, offset=0): """Feed observations to their placeholders pair the data of each channel with their corresponding placeholder. Arguments: placeholder (obj): either a TF placeholder (ph), a list of phs, or a dict of phs. obs (obj): nested dict or list of numpy array of observations. train_size (int): size of train data for one optimization. offset (int): offset of current sub train data. Returns: feed_dict (dict): a dict mapping each ph to the corresponding numpy array. """ feed_dict = dict() if isinstance(placeholder, list): assert len(placeholder) == len( obs), "{} placeholders but {} input supplied".format( len(placeholder), len(obs)) for ph, ob in zip(placeholder, obs): feed_dict[ph] = ob[offset:offset + train_size] if train_size else ob elif isinstance(placeholder, dict): assert len(placeholder) == len( obs), "{} placeholders but {} input supplied".format( len(placeholder), len(self.observation_space)) for ch_name in placeholder.keys(): ph = placeholder[ch_name] feed_dict[ph] = obs[ch_name][ offset:offset + train_size] if train_size else obs[ch_name] else: feed_dict[placeholder] = obs[offset:offset + train_size] if train_size else obs return feed_dict def _restore_one_channel(self, sp, data, start_index): """Extract the data of one channel from the flattened data. Arguments: sp (obj): basic `gym.spaces.space` object. data (obj): a numpy array of flattened observations. start_index (int): indicating the starting index of this channel. Returns: selected_data (obj): a numpy array of this channel's data. dim (int): the dimensionality of this channel's data. """ if isinstance(sp, gym.spaces.Box): dtype = np.float32 dim = prod(sp.shape) elif isinstance(sp, gym.spaces.Discrete): dtype = np.int32 dim = sp.n elif isinstance(sp, gym.spaces.MultiDiscrete): dtype = np.int32 dim = prod(sp.shape) elif isinstance(sp, gym.spaces.MultiBinary): dtype = np.int32 dim = prod(sp.shape) selected_data = np.asarray( data[:, start_index:start_index + dim]).astype(dtype) return selected_data, dim def setup(self, master, is_chief, global_step, ckpt_dir, summary_ops, global_vars=None, local_vars=None, save_var_list=None, save_steps=None, job_name="worker", task_index=0, async_mode=True): """ Arguments: master (obj): specify the target of TF session. is_chief (bool): indicating whether this process is a chief worker. global_step (obj): the global_step var in the binded graph. ckpt_dir (str): specify the checkpoint directory of TF session. summary_ops (dict): a dict of TF summary operators. global_vars (list): global variables. local_vars (list): local variables. save_var_list (list): list of saveable variables. save_steps: (int): every save_steps to save checkpoint. export_dir (list): path to export SavedModel. job_name (str): job_name in distributed mode. task_index (int): task_index in distributed mode. async_mode (bool): indicating whether this is an asynchronous task. """ if global_vars is not None: logger.info("in executor:") for v in global_vars: logger.info("{}".format(v)) init_op = tf.variables_initializer(global_vars) else: # single-machine init_op = tf.global_variables_initializer() if local_vars is None: local_init_op = None ready_op = tf.report_uninitialized_variables(global_vars) else: pair_global_vars, pair_local_vars = self.get_variable_pairs( global_vars, local_vars) for gv, lv in zip(pair_global_vars, pair_local_vars): logger.info("{}, {}".format(gv, lv)) local_init_op = tf.group(*([ tf.assign(local_var, global_var) for local_var, global_var in zip(pair_local_vars, pair_global_vars) ])) ready_op = tf.report_uninitialized_variables(global_vars + list(pair_local_vars)) ready_for_local_init_op = tf.report_uninitialized_variables( global_vars) # create tensorflow saver object self.saver = tf.train.Saver( var_list=global_vars if save_var_list is None else save_var_list, reshape=False, sharded=False, max_to_keep=10, keep_checkpoint_every_n_hours=10000.0, name=None, restore_sequentially=False, saver_def=None, builder=None, defer_build=False, allow_empty=True, write_version=tf.train.SaverDef.V2, pad_step_number=False, save_relative_paths=True) # handle restore variables from checkpoint def init_fn(scaffold, session): if ckpt_dir: file = tf.train.latest_checkpoint( checkpoint_dir=ckpt_dir, latest_filename=None) if file is not None: logger.info('begin to restore model from {}'.format(file)) scaffold.saver.restore(sess=session, save_path=file) self.scaffold = tf.train.Scaffold( init_op=init_op, init_feed_dict=None, init_fn=init_fn, ready_op=ready_op, ready_for_local_init_op=ready_for_local_init_op, local_init_op=local_init_op, summary_op=None, saver=self.saver, copy_from_scaffold=None) self.do_summary = False for flag, summary_op_list in summary_ops.items(): if len(summary_op_list) > 0: summary_ops[flag] = tf.summary.merge(summary_op_list) else: summary_ops[flag] = None if ckpt_dir: actor_summary_dir = os.path.join(ckpt_dir, "actor_summary") summary_dir = os.path.join(ckpt_dir, "worker_summary") summary_hook = easy_rl.utils.hooks.UpdateSummarySaverHook( self, global_step, job_name, task_index, save_steps=(save_steps or 100), output_dir=actor_summary_dir if job_name == "actor" else summary_dir, summary_op=summary_ops) saver_hook = tf.train.CheckpointSaverHook( checkpoint_dir=ckpt_dir, save_steps=(save_steps or 300), scaffold=self.scaffold, checkpoint_basename='model.ckpt') chief_only_hooks = [saver_hook] hooks = [summary_hook] else: chief_only_hooks = [] hooks = [] # filter devices for asynchronous training if async_mode: if job_name == "learner": device_filters = [ '/job:ps', '/job:memory', '/job:{job_name}/task:{task_index}'.format( job_name=job_name, task_index=task_index) ] else: device_filters = None config_proto = tf.ConfigProto(device_filters=device_filters) else: config_proto = None self.session = tf.train.MonitoredTrainingSession( master=master, is_chief=is_chief, checkpoint_dir=None, scaffold=self.scaffold, chief_only_hooks=chief_only_hooks, hooks=hooks, save_summaries_steps=None, save_summaries_secs=None, config=config_proto) def run(self, fetches, feed_dict, options=None, run_metadata=None): """Execution Run the given `fetches` with the given `feed_dict` by `self.session`. Arguments: fetches: a list of (nested) tensor-like objects. feed_dict (dict): keys are tensors (usually placeholders), values are numpy array. Returns: the fetched values of each op. """ return self.session.run( fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) def restore_action_shape_if_needed(self, actions): """ Arguments: actions (obj): 2-dimensional numpy array of shape (batch_size, flattened_action_shape). Returns: the reshaped actions. """ if isinstance(self.action_space, gym.spaces.Box): return np.reshape( actions, [actions.shape[0]] + list(self.action_space.shape)) return actions def get_variable_pairs(self, global_vars, local_vars): local_global_pairs = [] name_to_vars = {} for var in global_vars: name = var.name if re.search("/", name): m = re.match("([^/]*)(/.*)", name) scope_name = m.group(1) name = m.group(2) if scope_name in name: name = name.lstrip('/' + scope_name) name_to_vars[name] = var else: name_to_vars[name] = var for var in local_vars: name = var.name if re.search("/", name): m = re.match("([^/]*)(/.*)", name) scope_name = m.group(1) name = m.group(2) if scope_name in name: name = name.lstrip('/' + scope_name) if name in name_to_vars: local_global_pairs.append((name_to_vars[name], var)) return zip(*local_global_pairs) def unsafe_unfinalize(self): self.session.graph._unsafe_unfinalize() def finalize(self): self.session.graph.finalize()