from __future__ import print_function from itertools import chain try: import itertools.zip as zip except ImportError: pass from builtins import range import numpy as np from scipy.sparse import csr_matrix """ authors: Clement Gehring contact: gehring@csail.mit.edu date: May 2015 """ ################## VARIOUS INTERFACES ####################################### """ Generic map from a state (as an array), to a feature vector (as an array) """ class Projector(object): def __init__(self): pass """ Project a vector (or matrix of row vectors) to a corresponding feature vector. It should handle 1-D arrays and 2-D arrays. """ def __call__(self, state): raise NotImplementedError("Subclasses should implement this!") @property def size(self): raise NotImplementedError("Subclasses should implement this!") """ Generic map from a state-action pair (as two arrays), to a feature vector (as an array) """ class StateActionProjector(object): def __init__(self): pass """ Project two vectors (or two matrices of row vectors) to a corresponding feature vector. It should handle 1-D arrays and 2-D arrays for both arguments and any combination of these cases, i.e., one state but several actions and vice-versa. """ def __call__(self, state, action): raise NotImplementedError("Subclasses should implement this!") @property def size(self): raise NotImplementedError("Subclasses should implement this!") """ Hashing an array of indices to a single index. Mostly used for tile coding. """ class Hashing(object): def __init__(self, **kargs): pass """ Hash several indices (typically, one per dimension) onto one index (typically, index of a tile). This could be a simple cartesian-product, i.e., unique index for every combination, or some sort of randomized hash function, e.g., UNH. Must be able to deal with 2D arrays of indices. """ def __call__(self, indices): raise NotImplementedError("Subclasses should implement this!") ################## HELPER CLASSES FOR STATE PROJECTOR TO ##################### ################## STATE-ACTION PROJECTOR CONVERSION ##################### """ Simple method that ensures both arrays are 2D and that they share the same number of rows (for when only one row was given for one argument but not the other). """ def tile_and_adjust_state_action(state, action): # make everything 2-D if state.ndim == 1: state = state.reshape((1, -1)) if action.ndim == 1: action = action.reshape((1, -1)) # if one is 2D and but not the other, tile such that the dimensions # match. if state.shape[0] == 1 and action.shape[0] > 1: state = np.tile(state, (action.shape[0], 1)) elif action.shape[0] == 1 and state.shape[0] > 1: action = np.tile(action, (state.shape[0], 1)) return state, action """ Simple state-action projector that simply treats the actions as extra dimensions and feeds it to a projector. """ class ConcatStateAction(StateActionProjector): def __init__(self, projector): self.projector = projector def __call__(self, state, action): state, action = tile_and_adjust_state_action(state, action) sa = np.hstack((state, action)) return self.projector(sa) @property def size(self): return self.projector.size """ Simple state-action projector that simply ignores the action. Mainly to be used when only a value function is needed. """ class RemoveAction(StateActionProjector): def __init__(self, projector): self.projector = projector def __call__(self, state, action): state, action = tile_and_adjust_state_action(state, action) return self.projector(state) @property def size(self): return self.projector.size """ Create a tabular actions representaton with a state projector. The output vectors are padded with zeros such that the total dimension is n*num_actions, where n is the output dimension of the projector. If action i is given, then the whole vector is zero with the exception of columns n*i to n*(i+1), where the projected state will be encoded. The resulting output can be either dense or sparse. """ class TabularAction(StateActionProjector): def __init__(self, projector, num_actions, sparse=True): self.phi = projector self.__size = self.phi.size * num_actions self.num_actions = num_actions self.sparse = sparse def __call__(self, state, action): state, action = tile_and_adjust_state_action(state, action) phi_s = self.phi(state) phi_s = csr_matrix(phi_s) # this step assumes that, if sparse, each row has the same number of # non zero elements. action = np.tile(action, (1, phi_s.indptr[1] - phi_s.indptr[0])).reshape(-1).astype('int') phi_sa = csr_matrix((phi_s.data, phi_s.indices + action * self.phi.size, phi_s.indptr), shape=(phi_s.shape[0], self.size)) if not self.sparse: phi_sa = phi_sa.toarray() return phi_sa @property def size(self): return self.__size ################## HELPER CLASS FOR INDEX TO VECTOR CONVERSION ################ """ Projector converting a projector, which generate indices, to a projector generating sparse vectors (as a csr matrix) """ class IndexToBinarySparse(Projector): """ Constructor to go from a projector generating indices to a projector generating sparse vectors (as a csr matrix) index_projector: the projector generating indices, it needs to be be able to handle 2-D arrays """ def __init__(self, index_projector, normalize=False): super(IndexToBinarySparse, self).__init__() self.index_projector = index_projector self.normalize = normalize if normalize: self.entry_value = 1.0 / np.sqrt(self.index_projector.nonzeros) else: self.entry_value = 1.0 def __call__(self, state): # generate indices for a single (or several) binary sparse vector(s). indices = self.index_projector(state) if indices.ndim == 1: indices = indices.reshape((1, -1)) # set value of all active features vals = np.empty(indices.size) vals[:] = self.entry_value # create row pointers, this assumes each row has the same number # of non-zero entries row_ptr = np.arange(0, indices.size + 1, indices.shape[1]) # flatten the column indices generate ealier col_ind = indices.flatten() return csr_matrix((vals, col_ind, row_ptr), shape=(indices.shape[0], self.index_projector.size)) @property def size(self): return self.index_projector.size @property def xTxNorm(self): return 1.0 if self.normalize else self.index_projector.nonzeros """ Projector converting a projector, which generate indices, to a projector generating dense vectors (as an array) """ class IndexToDense(Projector): """ Constructor to go from a projector generating indices to a projector generating dense vectors (as an array) index_projector: the projector generating indices, it needs to be be able to handle 2-D arrays """ def __init__(self, index_projector, normalize=False): super(IndexToDense, self).__init__() self.index_projector = index_projector self.normalize = normalize if normalize: self.entry_value = 1.0 / np.sqrt(self.index_projector.nonzeros) else: self.entry_value = 1.0 def __call__(self, state): # generate indices for a single (or several) binary vectors indices = self.index_projector(state) if indices.ndim == 1: indices = indices.reshape((1, -1)) # allocate dense array output = np.zeros((indices.shape[0], self.size)) # create row indices row_ind = np.tile(np.arange(indices.shape[0]).reshape((-1, 1)), (1, indices.shape[1])).flatten() # set values for all active features output[row_ind, indices.flatten()] = self.entry_value # squeeze out useless dimensions, if any return output.squeeze() @property def size(self): return self.index_projector.size @property def xTxNorm(self): return 1.0 if self.normalize else self.index_projector.nonzeros """ Projector concatenating several projectors into the same representation by concatenating their outputs. """ class ConcatProjector(Projector): def __init__(self, projectors): super(ConcatProjector, self).__init__() self.phis = projectors def __call__(self, state): return np.hstack((phi(state) for phi in self.phis)) @property def size(self): return sum([phi.size for phi in self.phis]) class SparseRPTilecoding(IndexToBinarySparse): def __init__(self, index_projector, random_proj, normalize=False, output_dense=True): super(SparseRPTilecoding, self).__init__(index_projector, normalize=normalize) self.random_proj = random_proj self.output_dense = output_dense def __call__(self, X): phi = super(self.__class__, self).__call__(X) pphi = self.random_proj.dot(phi.T).T if self.output_dense: pphi = pphi.todense() return pphi @property def size(self): return self.random_proj.shape[0] ################## TILE CODING KERNEL FUNCTION ############################### class DenseKernel(IndexToBinarySparse): def __call__(self, X1, X2=None): phi1 = super(self.__class__, self).__call__(X1) if X2 is None: return phi1.dot(phi1.T).todense() else: phi2 = super(self.__class__, self).__call__(X2) return phi1.dot(phi2.T).todense() class DenseL1Kernel(object): def __init__(self, z, k, scale=1.0): self.z = z.copy() self.all_cat = np.unique(self.z) self.n_cat = self.all_cat.shape[0] self.k = k * 1.0 self.scale = scale * 1.0 def __call__(self, X1, X2=None): if X2 is None: if X1.ndim == 1: return self.scale K = 0 for a in self.all_cat: active = self.z == a K += computeKmm(X1[:, active], self.k[active], self.scale / self.n_cat) return K else: X1 = X1[None, :] if X1.ndim == 1 else X1 X2 = X2[None, :] if X2.ndim == 1 else X2 K = 0 for a in self.all_cat: active = self.z == a K += computeKnm(X1[:, active], X2[:, active], self.k[active], self.scale / self.n_cat) return K @property def xTxNorm(self): return self.scale # L1 kernel helpers: def computeKmm(Xbar, k, scale): Xbar = Xbar * k Qbar = (Xbar ** 2).sum(axis=1)[:, None] l2dist = Qbar + Qbar.T - 2 * (Xbar.dot(Xbar.T)) l2dist[l2dist < 0] = 0 ret = scale * np.exp(-np.sqrt(l2dist)) return ret def computeKnm(X, Xbar, k, scale): X = X * k Xbar = Xbar * k Q = (X ** 2).sum(axis=1)[:, None] Qbar = (Xbar ** 2).sum(axis=1)[:, None] l2dist = Q + Qbar.T - 2 * (X.dot(Xbar.T)) l2dist[l2dist < 0] = 0 ret = scale * np.exp(-np.sqrt(l2dist)) return ret class SparseKernel(IndexToBinarySparse): def __call__(self, X1, X2=None): phi1 = super(self.__class__, self).__call__(X1) if X2 is None: return phi1.dot(phi1.T) else: phi2 = super(self.__class__, self).__call__(X2) return phi1.dot(phi2.T) ################## TILE CODING IMPLEMENTATION ################################ """ Represents a series of layer of tile coding. This is equivalent to a single discretization of the input space. """ class Tiling(object): """ Constructor for a set of tilings. input_index: array (or list) of the input indices to be considered. This allows to specify on which inputs are the tilings defined. ntiles: integer, or array of integers, specifying how many uniform divisions for each dimension (or all dimensions if a single integer is given). Each layer in this set will have the same number of divisions in each dimension. ntilings: The number of individual layers in this set of tilings. state_range: range of each dimension offset: (optional) the offsets between each layer in this set of tilings. By default, each layer is uniformly offset from each other. Shape: (#dimensions, ntilings), i.e. for each dimension you have to specify the offset of each tiling. For dimension d, the offset should be negative and > -1/ntiles[d]. So if you want random offsets for one dimension, you could use something like this: -1.0/ntiles[d] * np.random.random_sample(size=ntilings) hashing: (optional) map from the individual bin index for each dimension to a tile index. By default, this is assumed to be a cartesian product, i.e., each combination if mapped to a unique index. This is equivalent to laying a grid over all input dimensions at once. Alternatively, this could map could be defined by a random hash funciton (e.g., UNH). """ def __init__(self, input_index, ntiles, ntilings, state_range, rnd_stream, offset=None, hashing=None): self.hashing = hashing if isinstance(ntiles, int): ntiles = np.array([ntiles] * len(input_index), dtype='int') else: ntiles = np.array(ntiles) self.state_range = [state_range[0][input_index].copy().astype(float)[None, :, None], state_range[1][input_index].copy().astype(float)[None, :, None]] if ntiles.ndim > 1: ntiles = ntiles[None, :, :] else: ntiles = ntiles[None, :, None] self.state_range[0] = self.state_range[0] - (self.state_range[1] - self.state_range[0]) / (ntiles - 1) self.offset = offset if offset is None: self.offset = np.empty((ntiles.shape[1], ntilings)) for i in range(ntiles.shape[0]): self.offset[i, :] = -rnd_stream.random_sample(ntilings) / ntiles[0, i] if self.hashing == None: self.hashing = IdentityHash(ntiles) self.input_index = np.array(input_index, dtype='int') self.size = ntilings * (self.hashing.memory) self.index_offset = (self.hashing.memory * np.arange(ntilings)).astype('int') self.ntiles = ntiles def __call__(self, state): return self.getIndices(state) def getIndices(self, state): if state.ndim == 1: state = state.reshape((1, -1))[:, :, None] else: state = state[:, :, None] nstate = (state[:, self.input_index, :] - self.state_range[0]) / (self.state_range[1] - self.state_range[0]) indicies = ((self.offset[None, :, :] + nstate) * self.ntiles).astype(np.int) return self.hashing(indicies) + self.index_offset[None, :] @property def ntilings(self): return self.offset.shape[1] """ Full tile coding implementation. This represents a projector, from states to features. """ class TileCoding(Projector): """ Constructor for a tile coding projector. The constructor builds several sets of individual tilings based on the input arguments. input_indicies: a list of arrays of indices. Each array of indicies specifies which input dimensions are considered by each set of tilings. There will be as many sets of tilings as there are array of indices. e.g., input_indices = [ [0,1], [1,2] ] will generate two sets of tilings, one defined on the first and second dimension, and the other on the second and third dimension. ntiles: list of a mix of integers or array of integers. This specifies the how fine is the discretization in each set of tilings. There should be an element (either integer or array of integers) for each set of tilings. If a set of tilings is given an integer, each dimensions are discretized in that many bins. If a set of tilings is given an array, it should be of the same size as the number of input dimensions it uses. In this case, it will discretize each dimensions in as many bins as the corresponding integer in the given array. e.g., ntiles = [ 4, [2,6] ] will generate two sets of tilings where the first discretizes all its input dimensions in 4 bins, and the second discretizes its first input dimension in 2 bins and its second, in 6 bins. ntilings: array (or list) of integers corresponding to how many individual layers are in each set of tilings. In this implementation, individual layers in the same set are uniformly offset from each other. hashing: either None, or list of hashing functions. This specifies the hashing function to be used by each set of tilings. It is assumed that each individual layer part of the same set share the same hash funciton. If None is given, then each set of tilings will use a cartesian product, i.e., each combination of indices is mapped to a unique tile. This is equivalent to laying a n-d grid on the input dimensions. state_range: range of each dimension offsets: (optional) the offsets between the layers for each set of tilings. By default, all layers are uniformly offset from each other. If you provide a list of lists of offsets (which is recommended), this must hold: len(offsets) == len(input_indices). Each item in offsets is passed to the constructor of Tiling, so see there for further documentation. bias_term: (optional) boolean specifying whether to add an extra bias term which is always on. By default, a bias_term is added. """ def __init__(self, input_indices, ntiles, ntilings, hashing, state_range, rnd_stream=None, offsets=None, bias_term=True): super(TileCoding, self).__init__() if hashing == None: hashing = [None] * len(ntilings) if offsets is None: offsets = [None] * len(input_indices) if offsets is None and rnd_stream is None: raise Exception( 'Either offsets for each tiling or a random stream (numpy) needs to be given in the constructor') self.state_range = np.array(state_range) self.tilings = [Tiling(in_index, nt, t, self.state_range, rnd_stream, offset=o, hashing=h) for in_index, nt, t, h, o in zip(input_indices, ntiles, ntilings, hashing, offsets)] self.__size = sum(map(lambda x: x.size, self.tilings)) self.bias_term = bias_term self.index_offset = np.zeros(len(ntilings), dtype='int') self.index_offset[1:] = np.cumsum(map(lambda x: x.size, self.tilings[:-1])) self.index_offset = np.hstack([np.array([off] * t, dtype='int') for off, t in zip(self.index_offset, ntilings)]) if bias_term: self.index_offset = np.hstack((self.index_offset, np.array(self.__size, dtype='int'))) self.__size += 1 self.__size = int(self.__size) """ Map a state vector, or several state vectors, to its corresponding tile indices. """ def __call__(self, state): if state.ndim == 1: state = state.reshape((1, -1)) # add bias term if needed, concatenate set of indices of all # the sets of tilings. if self.bias_term: indices = np.hstack(chain((t(state) for t in self.tilings), [np.zeros((state.shape[0], 1), dtype='int')])) \ + self.index_offset else: indices = np.hstack((t(state) for t in self.tilings)) \ + self.index_offset return indices.squeeze() @property def size(self): return self.__size @property def nonzeros(self): return np.sum([t.ntilings for t in self.tilings]) + (1 if self.bias_term else 0) class UNH(Hashing): # constants were taken from rlpark's implementation. increment = 470 def __init__(self, memory, rnd_stream): super(UNH, self).__init__() self.rndseq = np.zeros(16384, dtype='int') self.memory = int(memory) for i in range(4): self.rndseq = self.rndseq << 8 | rnd_stream.random_integers(np.iinfo('int16').min, np.iinfo('int16').max, 16384) & 0xff def __call__(self, indices): rnd_seq = self.rndseq a = self.increment * np.arange(indices.shape[1]) index = indices + a[None, :, None] index = index - (index.astype(np.int) / rnd_seq.size) * rnd_seq.size hashed_index = (np.sum(rnd_seq[index], axis=1)).astype(np.int) return (hashed_index - (hashed_index / self.memory).astype(np.int) * self.memory).astype('int') class IdentityHash(Hashing): def __init__(self, dims, wrap=False): super(IdentityHash, self).__init__() self.memory = np.prod(dims) self.dims = dims.astype('int') self.wrap = wrap self.dim_offset = np.cumprod(np.vstack((np.ones((self.dims.shape[2], 1)), self.dims[0, :0:-1, :])), axis=0).astype('int')[None, ::-1, :] def __call__(self, indices): if self.wrap: indices = np.remainder(indices, self.dims) else: indices = np.clip(indices, 0, self.dims - 1) return np.sum(indices * self.dim_offset, axis=1) ################## END OF TILE CODING IMPLEMENTATION ######################### ################## RBF IMPLEMENTATION ######################################## class RBFCoding(Projector): """ Constructor for an RBF encoding. stddev: scaling of the dimensions when computing the distance. Each dimension needs a scale. If only a 1-D array is given, all RBFs are assumed to have the same scaling, otherwise, it is assumed that there is a row specifying the scale for each RBF. c: centers of the RBFs. The number of rows corresponds to the number of RBFs. The number of column should be equal to the input dimension. Each row is a center for a RBF. normalized: Boolean to decided whether the RBFs should be normalized. bias_term: Boolean to decided whether the output should be augmented with a constant 1.0 bias term. """ def __init__(self, widths, centers, normalized=False, bias_term=True, **params): super(RBFCoding, self).__init__() # the centers of the rbfs self.c = centers.T[None, :, :] # the widths of the rbfs, each rbf can have different widths if widths.ndim == 1: self.w = widths[None, :, None] else: self.w = widths.T[None, :, :] # should the output of the rbfs sum to one self.normalized = normalized # include a bias term (always equal to one) self.bias_term = bias_term # size of the encoded vectors self.__size = centers.shape[0] # if bias term is included, increment the size if bias_term: self.__size += 1 def __call__(self, state): # if only on 1-D array given, reshape to a compatible shape if state.ndim == 1: state = state.reshape((1, -1)) # allocate and set bias term if needed last_index = self.size output = np.empty((state.shape[0], self.size)) if self.bias_term: last_index -= 1 output[:, -1] = 1.0 # compute squared weighted distance distance dsqr = -(((state[:, :, None] - self.c) / self.w) ** 2).sum(axis=1) if self.normalized: # compute the normalized rbfs from the distances e_x = np.exp(dsqr - dsqr.min(axis=1)[:, None]) output[:, :last_index] = e_x / e_x.sum(axis=1)[:, None] else: # compute the rbfs from the distances output[:, :last_index] = np.exp(dsqr) # return encoded input, squeeze out extra dimensions (in the case # only on input row was given) return output.squeeze() @property def size(self): return self.__size """ Method to generate grids of points (typically for RBF coding). state_range: range of each dimension num_centers: An integer or an array (or list) of integers which corresponds the number of points to distribute on each dimensions. If a single integer is given, all dimensions will have the same number of points. """ def grid_of_points(state_range, num_centers): if isinstance(num_centers, int): num_centers = [num_centers] * state_range[0].shape[0] points = [np.linspace(start, stop, num, endpoint=True) for start, stop, num in zip(state_range[0], state_range[1], num_centers)] points = np.meshgrid(*points) points = np.concatenate([p.reshape((-1, 1)) for p in points], axis=1) return points ################## END OF RBF IMPLEMENTATION ##################################