import copy from typing import Dict, Generator, Iterable, List, Optional, Set, Tuple, Union import numpy from fragile.core.utils import ( float_type, hash_numpy, hash_type, Scalar, similiar_chunks_indexes, StateDict, ) class States: """ Handles several arrays that will contain the data associated with the \ walkers of a :class:`Swarm`. Each array will be associated to a class \ attribute, and it will store the corresponding value of that attribute \ for all the walkers of a :class:`Swarm`. This class behaves as a dictionary of arrays with some extra functionality \ to make easier the process of cloning the walkers' data. All of its internal \ arrays will have an extra first dimension equal to the number of walkers. In order to define the tensors, a `state_dict` dictionary needs to be \ specified using the following structure:: state_dict = {"name_1": {"size": tuple([1]), "dtype": numpy.float32, }, } Where tuple is a tuple indicating the shape of the desired tensor. The \ created arrays will accessible the ``name_1`` attribute of the class, or \ indexing the class with ``states["name_1"]``. If ``size`` is not defined the attribute will be considered a vector of \ length `batch_size`. Args: batch_size: The number of items in the first dimension of the tensors. state_dict: Dictionary defining the attributes of the tensors. **kwargs: Data can be directly specified as keyword arguments. """ def __init__(self, batch_size: int, state_dict: Optional[StateDict] = None, **kwargs): """ Initialize a :class:`States`. Args: batch_size: The number of items in the first dimension of the tensors. state_dict: Dictionary defining the attributes of the tensors. **kwargs: The name-tensor pairs can also be specified as kwargs. """ attr_dict = self.params_to_arrays(state_dict, batch_size) if state_dict is not None else {} attr_dict.update(kwargs) self._names = list(attr_dict.keys()) self._attr_dict = attr_dict self.update(**self._attr_dict) self._batch_size = batch_size def __len__(self): """Length is equal to n_walkers.""" return self._batch_size def __getitem__( self, item: Union[str, int, numpy.int64] ) -> Union[numpy.ndarray, List[numpy.ndarray], "States"]: """ Query an attribute of the class as if it was a dictionary. Args: item: Name of the attribute to be selected. Returns: The corresponding item. """ if isinstance(item, str): try: return getattr(self, item) except AttributeError: raise TypeError("Tried to get a non existing attribute with key {}".format(item)) elif isinstance(item, (int, numpy.int64)): return self._ix(item) else: raise TypeError( "item must be an instance of str, got {} of type {} instead".format( item, type(item) ) ) def _ix(self, index: int): # TODO(guillemdb): Allow slicing data = { k: numpy.array([v[index]]) if isinstance(v, numpy.ndarray) else v for k, v in self.items() } return self.__class__(batch_size=1, **data) def __setitem__(self, key, value: Union[Tuple, List, numpy.ndarray]): """ Allow the class to set its attributes as if it was a dict. Args: key: Attribute to be set. value: Value of the target attribute. Returns: None. """ if key not in self._names: self._names.append(key) self.update(**{key: value}) def __repr__(self): string = "{} with {} walkers\n".format(self.__class__.__name__, self.n) for k, v in self.items(): shape = v.shape if hasattr(v, "shape") else None new_str = "{}: {} {}\n".format(k, type(v), shape) string += new_str return string def __hash__(self) -> int: _hash = hash( tuple( [hash_numpy(x) if isinstance(x, numpy.ndarray) else hash(x) for x in self.vals()] ) ) return _hash def group_hash(self, name: str) -> int: """Return a unique id for a given attribute.""" val = getattr(self, name) return hash_numpy(val) if isinstance(val, numpy.ndarray) else hash(val) def hash_values(self, name: str) -> List[int]: """Return a unique id for each walker attribute.""" values = getattr(self, name) hashes = [ hash_numpy(val) if isinstance(val, numpy.ndarray) else hash(val) for val in values ] return hashes @staticmethod def merge_states(states: Iterable["States"]) -> "States": """ Combine different states containing the same kind of data into a single \ :class:`State` with batch size equal to the sum of all the state batch \ sizes. Args: states: Iterable returning :class:`States` with the same attributes. Returns: :class:`States` containing the combined data of the input values. """ def merge_one_name(states_list, name): vals = [] is_scalar_vector = True for state in states_list: data = state[name] # Attributes that are not numpy arrays are not stacked. if not isinstance(data, numpy.ndarray): return data state_len = len(state) if len(data.shape) == 0 and state_len == 1: # Name is scalar vector. Data is scalar value. Transform to array first value = numpy.array([data]).flatten() elif len(data.shape) == 1 and state_len == 1: if data.shape[0] == 1: # Name is scalar vector. Data already transformed to an array value = data else: # Name is a matrix of vectors. Data needs an additional dimension is_scalar_vector = False value = numpy.array([data]) elif len(data.shape) == 1 and state_len > 1: # Name is a scalar vector. Data already has is a one dimensional array value = data elif ( len(data.shape) > 1 and state_len > 1 or len(data.shape) > 1 and len(state) == 1 ): # Name is a matrix of vectors. Data has the correct shape is_scalar_vector = False value = data else: raise ValueError( "Could not infer data concatenation for attribute %s with shape %s" % (name, data.shape) ) vals.append(value) if is_scalar_vector: return numpy.concatenate(vals) else: return numpy.vstack(vals) # Assumes all states have the same names. data = {name: merge_one_name(states, name) for name in states[0]._names} batch_size = sum(s.n for s in states) return states[0].__class__(batch_size=batch_size, **data) @property def n(self) -> int: """Return the batch_size of the vectors, which is equivalent to the number of walkers.""" return self._batch_size def get(self, key: str, default=None): """ Get an attribute by key and return the default value if it does not exist. Args: key: Attribute to be recovered. default: Value returned in case the attribute is not part of state. Returns: Target attribute if found in the instance, otherwise returns the default value. """ if key not in self.keys(): return default return self[key] def keys(self) -> Generator: """Return a generator for the attribute names of the stored data.""" return (name for name in self._names if not name.startswith("_")) def vals(self) -> Generator: """Return a generator for the values of the stored data.""" return (self[name] for name in self._names if not name.startswith("_")) def items(self) -> Generator: """Return a generator for the attribute names and the values of the stored data.""" return ((name, self[name]) for name in self._names if not name.startswith("_")) def itervals(self): """ Iterate the states attributes by walker. Returns: Tuple containing all the names of the attributes, and the values that correspond to a given walker. """ if self.n <= 1: return self.vals() for i in range(self.n): yield tuple([v[i] for v in self.vals()]) def iteritems(self): """ Iterate the states attributes by walker. Returns: Tuple containing all the names of the attributes, and the values that correspond to a given walker. """ if self.n < 1: return self.vals() for i in range(self.n): values = (v[i] if isinstance(v, numpy.ndarray) else v for v in self.vals()) yield tuple(self._names), tuple(values) def split_states(self, n_chunks: int) -> Generator["States", None, None]: """ Return a generator for n_chunks different states, where each one \ contain only the data corresponding to one walker. """ def get_chunck_size(state, start, end): for name in state._names: attr = state[name] if isinstance(attr, numpy.ndarray): return len(attr[start:end]) return int(numpy.ceil(self.n / n_chunks)) for start, end in similiar_chunks_indexes(self.n, n_chunks): chunk_size = get_chunck_size(self, start, end) data = { k: val[start:end] if isinstance(val, numpy.ndarray) else val for k, val in self.items() } new_state = self.__class__(batch_size=chunk_size, **data) yield new_state def update(self, other: "States" = None, **kwargs): """ Modify the data stored in the States instance. Existing attributes will be updated, and new attributes will be created if needed. Args: other: State class that will be copied upon update. **kwargs: It is possible to specify the update as key value attributes, \ where key is the name of the attribute to be updated, and value \ is the new value for the attribute. """ def update_or_set_attributes(attrs: Union[dict, States]): for name, val in attrs.items(): try: getattr(self, name)[:] = copy.deepcopy(val) except (AttributeError, TypeError, KeyError, ValueError): setattr(self, name, copy.deepcopy(val)) if other is not None: update_or_set_attributes(other) if kwargs: update_or_set_attributes(kwargs) def clone( self, will_clone: numpy.ndarray, compas_ix: numpy.ndarray, ignore: Optional[Set[str]] = None, ): """ Clone all the stored data according to the provided arrays. Args: will_clone: Array of shape (n_walkers,) of booleans indicating the \ index of the walkers that will clone to a random companion. compas_ix: Array of integers of shape (n_walkers,). Contains the \ indexes of the walkers that will be copied. ignore: set containing the names of the attributes that will not be \ cloned. """ ignore = set() if ignore is None else ignore for name in self.keys(): if isinstance(self[name], numpy.ndarray) and name not in ignore: self[name][will_clone] = self[name][compas_ix][will_clone] def get_params_dict(self) -> StateDict: """Return a dictionary describing the data stored in the :class:`States`.""" return { k: {"shape": v.shape, "dtype": v.dtype} for k, v in self.__dict__.items() if isinstance(v, numpy.ndarray) } def copy(self) -> "States": """Crete a copy of the current instance.""" param_dict = {str(name): val.copy() for name, val in self.items()} return States(batch_size=self.n, **param_dict) @staticmethod def params_to_arrays(param_dict: StateDict, n_walkers: int) -> Dict[str, numpy.ndarray]: """ Create a dictionary containing the arrays specified by param_dict. Args: param_dict: Dictionary defining the attributes of the tensors. n_walkers: Number items in the first dimension of the data tensors. Returns: Dictionary with the same keys as param_dict, containing arrays specified \ by `param_dict` values. """ tensor_dict = {} for key, val in param_dict.items(): # Shape already includes the number of walkers. Remove walkers axis to create size. shape = val.get("shape") if shape is None: val_size = val.get("size") elif len(shape) > 1: val_size = shape[1:] else: val_size = val.get("size") # Create appropriate shapes with current state's number of walkers. sizes = n_walkers if val_size is None else tuple([n_walkers]) + val_size if "size" in val: del val["size"] if "shape" in val: del val["shape"] tensor_dict[key] = numpy.zeros(shape=sizes, **val) return tensor_dict class StatesEnv(States): """ Keeps track of the data structures used by the :class:`Environment`. Attributes: states: This data tracks the internal state of the Environment simulation, \ and they are only used to save and restore its state. observs: This is the data that corresponds to the observations of the \ current :class:`Environment` state. The observations are used \ for calculating distances. rewards: This vector contains the rewards associated with each observation. oobs: Stands for **Out Of Bounds**. It is a vector of booleans that \ represents and arbitrary boundary condition. If a value is ``True`` \ the corresponding states will be treated as being outside the \ :class:`Environment` domain. The states considered out of bounds \ will be avoided by the sampling algorithms. terminals: Vector of booleans representing the successful termination \ of an environment. A ``True`` value indicates that the \ :class:`Environment` has successfully reached a terminal \ state that is not out of bounds. """ def __init__(self, batch_size: int, state_dict: Optional[StateDict] = None, **kwargs): """ Initialise a :class:`StatesEnv`. Args: batch_size: The number of items in the first dimension of the tensors. state_dict: Dictionary defining the attributes of the tensors. **kwargs: The name-tensor pairs can also be specified as kwargs. """ self.observs = None self.states = None self.rewards = None self.oobs = None self.terminals = None updated_dict = self.get_params_dict() if state_dict is not None: updated_dict.update(state_dict) super(StatesEnv, self).__init__(state_dict=updated_dict, batch_size=batch_size, **kwargs) def get_params_dict(self) -> StateDict: """Return a dictionary describing the data stored in the :class:`StatesEnv`.""" params = { "states": {"dtype": numpy.int64}, "observs": {"dtype": numpy.float32}, "rewards": {"dtype": numpy.float32}, "oobs": {"dtype": numpy.bool_}, "terminals": {"dtype": numpy.bool_}, } state_dict = super(StatesEnv, self).get_params_dict() params.update(state_dict) return params class StatesModel(States): """ Keeps track of the data structures used by the :class:`Model`. Attributes: actions: Represents the actions that will be sampled by a :class:`Model`. """ def __init__(self, batch_size: int, state_dict: Optional[StateDict] = None, **kwargs): """ Initialise a :class:`StatesModel`. Args: batch_size: The number of items in the first dimension of the tensors. state_dict: Dictionary defining the attributes of the tensors. **kwargs: The name-tensor pairs can also be specified as kwargs. """ self.actions = None updated_dict = self.get_params_dict() if state_dict is not None: updated_dict.update(state_dict) super(StatesModel, self).__init__(state_dict=updated_dict, batch_size=batch_size, **kwargs) def get_params_dict(self) -> StateDict: """Return the parameter dictionary with tre attributes common to all Models.""" params = { "actions": {"dtype": numpy.float32}, } state_dict = super(StatesModel, self).get_params_dict() params.update(state_dict) return params class StatesWalkers(States): """ Keeps track of the data structures used by the :class:`Walkers`. Attributes: id_walkers: Array of of integers that uniquely identify a given state. \ They are obtained by hashing the states. compas_clone: Array of integers containing the index of the walkers \ selected as companions for cloning. processed_rewards: Array of normalized rewards. It contains positive \ values with an average of 1. Values greater than one \ correspond to rewards above the average, and values \ lower than one correspond to rewards below the average. virtual_rewards: Array containing the virtual rewards assigned to each walker. cum_rewards: Array of rewards used to compute the virtual_reward. This \ value can accumulate the rewards provided by the \ :class:`Environment` during an algorithm run. distances: Array containing the similarity metric of each walker used \ to compute the virtual reward. clone_probs: Array containing the probability that a walker clones to \ its companion during the cloning phase. will_clone: Boolean array. A ``True`` value indicates that the \ corresponding walker will clone to its companion. in_bounds: Boolean array. A `True` value indicates that a walker is \ in the domain defined by the :class:`Environment`. best_state: State of the walker with the best ``cum_reward`` found \ during the algorithm run. best_obs: Observation corresponding to the ``best_state``. best_reward: Best ``cum_reward`` found during the algorithm run. best_id: Integer representing the hash of the ``best_state``. """ def __init__(self, batch_size: int, state_dict: Optional[StateDict] = None, **kwargs): """ Initialize a :class:`StatesWalkers`. Args: batch_size: Number of walkers that the class will be tracking. state_dict: Dictionary defining the attributes of the tensors. kwargs: attributes that will not be set as numpy.ndarrays """ self.will_clone = None self.compas_clone = None self.processed_rewards = None self.cum_rewards = None self.virtual_rewards = None self.distances = None self.clone_probs = None self.in_bounds = None self.id_walkers = None # This is only to allow __repr__. Should be overridden after reset self.best_id = None self.best_obs = None self.best_state = None self.best_reward = -numpy.inf self.best_time = 0 self.times = None updated_dict = self.get_params_dict() if state_dict is not None: updated_dict.update(state_dict) super(StatesWalkers, self).__init__( state_dict=updated_dict, batch_size=batch_size, **kwargs ) def get_params_dict(self) -> StateDict: """Return a dictionary containing the param_dict to build an instance \ of States that can handle all the data generated by the :class:`Walkers`. """ params = { "id_walkers": {"dtype": hash_type}, "compas_clone": {"dtype": numpy.int64}, "times": {"dtype": numpy.int64}, "processed_rewards": {"dtype": float_type}, "virtual_rewards": {"dtype": float_type}, "cum_rewards": {"dtype": float_type}, "distances": {"dtype": float_type}, "clone_probs": {"dtype": float_type}, "will_clone": {"dtype": numpy.bool_}, "in_bounds": {"dtype": numpy.bool_}, } state_dict = super(StatesWalkers, self).get_params_dict() params.update(state_dict) return params def clone(self, **kwargs) -> Tuple[numpy.ndarray, numpy.ndarray]: """Perform the clone only on cum_rewards and id_walkers and reset the other arrays.""" clone, compas = self.will_clone, self.compas_clone self.cum_rewards[clone] = copy.deepcopy(self.cum_rewards[compas][clone]) self.id_walkers[clone] = copy.deepcopy(self.id_walkers[compas][clone]) self.virtual_rewards[clone] = copy.deepcopy(self.virtual_rewards[compas][clone]) self.times[clone] = copy.deepcopy(self.times[compas][clone]) return clone, compas def reset(self): """Clear the internal data of the class.""" params = self.get_params_dict() other_attrs = [name for name in self.keys() if name not in params] for attr in other_attrs: setattr(self, attr, None) self.update( id_walkers=numpy.zeros(self.n, dtype=hash_type), compas_dist=numpy.arange(self.n), compas_clone=numpy.arange(self.n), times=numpy.zeros(self.n, dtype=numpy.int64), processed_rewards=numpy.zeros(self.n, dtype=float_type), cum_rewards=numpy.zeros(self.n, dtype=float_type), virtual_rewards=numpy.ones(self.n, dtype=float_type), distances=numpy.zeros(self.n, dtype=float_type), clone_probs=numpy.zeros(self.n, dtype=float_type), will_clone=numpy.zeros(self.n, dtype=numpy.bool_), in_bounds=numpy.ones(self.n, dtype=numpy.bool_), ) def _ix(self, index: int): # TODO(guillemdb): Allow slicing data = { k: numpy.array([v[index]]) if isinstance(v, numpy.ndarray) and "best" not in k else v for k, v in self.items() } return self.__class__(batch_size=1, **data) class OneWalker(States): """ Represent one walker. This class is used for initializing a :class:`Swarm` to a given state without having to explicitly define the :class:`StatesEnv`, :class:`StatesModel` and :class:`StatesWalkers`. """ def __init__( self, state: numpy.ndarray, observ: numpy.ndarray, reward: Scalar, id_walker=None, time=0, state_dict: StateDict = None, **kwargs ): """ Initialize a :class:`OneWalker`. Args: state: Non batched numpy array defining the state of the walker. observ: Non batched numpy array defining the observation of the walker. reward: Scalar value representing the reward of the walker. id_walker: Hash of the provided State. If None it will be calculated when the the :class:`OneWalker` is initialized. state_dict: External :class:`StateDict` that overrides the default values. time: Time step of the current walker. Measures the length of the path followed \ by the walker. **kwargs: Additional data needed to define the walker. Its structure \ needs to be defined in the provided ``state_dict``. These attributes will be assigned to the :class:`EnvStates` of the :class:`Swarm`. """ self.id_walkers = None self.rewards = None self.observs = None self.states = None self.times = None self._observs_size = observ.shape self._observs_dtype = observ.dtype self._states_size = state.shape self._states_dtype = state.dtype self._times_dtype = numpy.int64 self._rewards_dtype = type(reward) # Accept external definition of param_dict values walkers_dict = self.get_params_dict() if state_dict is not None: for k, v in state_dict.items(): if k in ["observs", "states"]: # These two are parsed from the provided opts continue if k in walkers_dict: walkers_dict[k] = v super(OneWalker, self).__init__(batch_size=1, state_dict=walkers_dict) # Keyword arguments must be defined in state_dict if state_dict is not None: for k in kwargs.keys(): if k not in state_dict: raise ValueError( "The provided attributes must be defined in state_dict." "param_dict: %s\n kwargs: %s" % (state_dict, kwargs) ) self.observs[:] = copy.deepcopy(observ) self.states[:] = copy.deepcopy(state) self.rewards[:] = copy.deepcopy(reward) self.times[:] = copy.deepcopy(time) self.id_walkers[:] = ( copy.deepcopy(id_walker) if id_walker is not None else hash_numpy(state) ) self.update(**kwargs) def __repr__(self): with numpy.printoptions(linewidth=100, threshold=200, edgeitems=9): string = ( "reward: %s\n" "time: %s\n" "observ: %s\n" "state: %s\n" "id: %s" % ( self.rewards[0], self.times[0], self.observs[0].flatten(), self.states[0].flatten(), self.id_walkers[0], ) ) return string def get_params_dict(self) -> StateDict: """Return a dictionary containing the param_dict to build an instance \ of States that can handle all the data generated by the :class:`Walkers`. """ params = { "id_walkers": {"dtype": hash_type}, "rewards": {"dtype": self._rewards_dtype}, "observs": {"dtype": self._observs_dtype, "size": self._observs_size}, "states": {"dtype": self._states_dtype, "size": self._states_size}, "times": {"dtype": self._times_dtype}, } return params