python source code of blr

import tensorflow as tf

from rltf.tf_utils import tf_inv


class BLR(tf.layers.Layer):
  """Bayesian Linear Regression Layer
  By default:
    - Uses tf.float64 for internal variables since higher precision is required for inverting matrices
    - Inverts the precision matrix using the woodburry formula - more stable and efficient
    -
  NOTE:
    - Internal variables are not captured by `tf.trainable_variables()` and by optimizers
      - This is because `self.trainable_variables` returns an empty list
      - The fact that variables are set as trainable at instantiation does not affect this
    - Internal variables are captured when selecting variables from a given scope
      - This means that if an agent and a target network with identical structures contain BLR layers,
        the BLR variables will get updated, if all variables within the relevant scopes are captured
    - For training variables, one needs to use the op returned by `self.train()`
  """

  def __init__(self, tau, sigma_e, mode="mean", w_dim=None, bias=False, dtype=tf.float64, name=None):

    super().__init__(trainable=False, dtype=dtype, name=name)

    assert mode in ["mean", "ts"]

    self.sigma  = sigma_e
    self.beta   = 1.0 / self.sigma**2
    self.tau    = tau
    self.bias   = bias
    self.w_dim  = w_dim
    self.mode   = mode

    # Custom TF Tensors and Ops
    self.w_mu     = None
    self.w_Sigma  = None
    self.w_Lambda = None
    self.w        = None  # Sampled value for w when using Thompson Sampling
    self.reset_op = None  # Reset all trainable variables to their inNeed to make sure it is itial values

    self.input_spec = tf.layers.InputSpec(min_ndim=2, max_ndim=2)


  def build(self, input_shape):
    input_shape = tf.TensorShape(input_shape)

    if input_shape[-1].value is None:
      raise ValueError('The last dimension of the inputs to `BLR` should be defined. Found `None`.')

    if self.w_dim is None:
      self.w_dim      = input_shape[-1].value
      self.input_spec = tf.layers.InputSpec(min_ndim=2, max_ndim=2, axes={-1: self.w_dim})
    else:
      assert self.w_dim == input_shape[-1].value

    I = tf.eye(self.w_dim, dtype=self.dtype)

    mu_init     = tf.zeros([self.w_dim, 1], dtype=self.dtype)
    Sigma_init  = 1.0/self.tau * I
    Lambda_init = self.tau * I

    self.w_mu     = self.add_variable("w_mu",
                                      shape=[self.w_dim, 1],
                                      # initializer=lambda *args, **kwargs: mu_init,
                                      initializer=tf.zeros_initializer,
                                      trainable=True)

    self.w_Sigma  = self.add_variable("w_Sigma",
                                      shape=[self.w_dim, self.w_dim],
                                      initializer=lambda *args, **kwargs: Sigma_init,
                                      trainable=True)

    self.w_Lambda = self.add_variable("w_Lambda",
                                      shape=[self.w_dim, self.w_dim],
                                      initializer=lambda *args, **kwargs: Lambda_init,
                                      trainable=True)

    self.w        = self.add_variable("w",
                                      shape=[self.w_dim, 1],
                                      # initializer=lambda *args, **kwargs: mu_init,
                                      initializer=tf.zeros_initializer,
                                      trainable=False)

    # Build the reset op
    self.reset_op = self._tf_update_params(mu_init, Sigma_init, Lambda_init)

    # Add debug histogrrams
    # tf.summary.histogram("debug/BLR/w_mu",      self.w_mu)
    # tf.summary.histogram("debug/BLR/w_Sigma",   self.w_Sigma)
    # tf.summary.histogram("debug/BLR/w_Lambda",  self.w_Lambda)
    # tf.summary.histogram("debug/BLR/w_ts",      self.w)

    self.built = True


  def call(self, inputs, **kwargs):
    """ Compute the posterior predictive distribution
    Args:
      X: tf.Tensor, `shape=[None, D]`. The feature matrix
    Returns:
      List of `tf.Tensor`s:
        mu: tf.Tensor, `shape=[None, 1]. The mean at each test point
        var: tf.Tensor, `shape=[None, 1]. The variance at each test point
    """
    X = self._cast_input(inputs)

    # Thompson Sampling Output
    if self.mode == "ts":
      mu = tf.matmul(X, self.w)
    # Bayesian Regression Output
    else:
      mu = tf.matmul(X, self.w_mu)

    # var ends up being diag(sigma**2 + matmul(matmul(X, w_Sigma), X.T))
    var = self.sigma**2 + tf.reduce_sum(tf.matmul(X, self.w_Sigma) * X, axis=-1, keepdims=True)
    # std = tf.sqrt(var)

    outputs = [mu, var]
    outputs = [self._cast_output(t) for t in outputs]
    return outputs


  def train(self, X, y):
    """Compute the weight posteriror of Bayesian Linear Regression
    Args:
      X: tf.Tensor, `shape=[None, D]`. The feature matrix
      y: tf.Tensor, `shape=[None, 1]`. The correct outputs
    Returns:
      tf.Op which performs the update operation
    """
    X = self._cast_input(X)
    y = self._cast_input(y)

    # Compute the posterior precision matrix
    w_Lambda = self.w_Lambda + self.beta * tf.matmul(X, X, transpose_a=True)

    # Compute the posterior covariance matrix
    X_norm  = 1.0 / self.sigma * X
    w_Sigma = tf_inv.woodburry_inverse(self.w_Sigma, tf.transpose(X_norm), X_norm)

    error = tf.losses.mean_squared_error(tf.matmul(w_Lambda, w_Sigma), tf.eye(self.w_dim))
    tf.summary.scalar("debug/BLR/inv_error", error)

    # Compute the posterior mean
    w_mu = tf.matmul(w_Sigma, self.beta * tf.matmul(X, y, True) + tf.matmul(self.w_Lambda, self.w_mu))

    return self._tf_update_params(w_mu, w_Sigma, w_Lambda)


  def resample_w(self, cholesky=False):
    sample = tf.random_normal(shape=self.w_mu.shape, dtype=self.dtype)

    # Compute A s.t. A A^T = w_Sigma. Note that SVD and Cholesky give different A
    if cholesky:
      # Use cholesky
      A = tf.cholesky(self.w_Sigma)
    else:
      # Use SVD
      S, U, _ = tf.svd(self.w_Sigma)
      A = tf.matmul(U, tf.diag(tf.sqrt(S)))

    w = self.w_mu + tf.matmul(A, sample)
    return tf.assign(self.w, w, name="resample_w")


  @property
  def reset(self):
    return self.reset_op


  @property
  def trainable_weights(self):
    return self._trainable_weights or []


  def _tf_update_params(self, w_mu, w_Sigma, w_Lambda):
    """
    Returns:
      tf.Op which performs an update on all weight parameters
    """
    mu_op     = tf.assign(self.w_mu,      w_mu)
    Sigma_op  = tf.assign(self.w_Sigma,   w_Sigma)
    Lambda_op = tf.assign(self.w_Lambda,  w_Lambda)
    return tf.group(mu_op, Sigma_op, Lambda_op)


  def _cast_input(self, x):
    if self.dtype == tf.float64 and x.dtype.base_dtype != tf.float64:
      x = tf.cast(x, self.dtype)
    return x


  def _cast_output(self, x):
    if x.dtype.base_dtype != tf.float32:
      x = tf.cast(x, tf.float32)
    return x