Python tensorflow.exp() Examples

The following are 30 code examples of tensorflow.exp(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: density_model.py    From cs294-112_hws with MIT License 6 votes vote down vote up
def make_encoder(self, state, z_size, scope, n_layers, hid_size):
        """
            ### PROBLEM 3
            ### YOUR CODE HERE

            args:
                state: tf variable
                z_size: output dimension of the encoder network
                scope: scope name
                n_layers: number of layers of the encoder network
                hid_size: hidden dimension of encoder network

            TODO:
                1. z_mean: the output of a neural network that takes the state as input,
                    has output dimension z_size, n_layers layers, and hidden 
                    dimension hid_size
                2. z_logstd: a trainable variable, initialized to 0
                    shape (z_size,)

            Hint: use build_mlp
        """
        z_mean = build_mlp(state, z_size, scope, n_layers, hid_size)
        z_logstd = tf.get_variable('z_logstd', shape=z_size, trainable=True,
                                   initializer=tf.constant_initializer(value=0.))
        return tfp.distributions.MultivariateNormalDiag(loc=z_mean, scale_diag=tf.exp(z_logstd)) 
Example #2
Source File: distributions.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def __init__(self, x_bxu, z_size, name, var_min=0.0):
    """Create an input dependent diagonal Gaussian distribution.

    Args:
      x: The input tensor from which the mean and variance are computed,
        via a linear transformation of x.  I.e.
          mu = Wx + b, log(var) = Mx + c
      z_size: The size of the distribution.
      name:  The name to prefix to learned variables.
      var_min (optional): Minimal variance allowed.  This is an additional
        way to control the amount of information getting through the stochastic
        layer.
    """
    size_bxn = tf.stack([tf.shape(x_bxu)[0], z_size])
    self.mean_bxn = mean_bxn = linear(x_bxu, z_size, name=(name+"/mean"))
    logvar_bxn = linear(x_bxu, z_size, name=(name+"/logvar"))
    if var_min > 0.0:
      logvar_bxn = tf.log(tf.exp(logvar_bxn) + var_min)
    self.logvar_bxn = logvar_bxn

    self.noise_bxn = noise_bxn = tf.random_normal(size_bxn)
    self.noise_bxn.set_shape([None, z_size])
    self.sample_bxn = mean_bxn + tf.exp(0.5 * logvar_bxn) * noise_bxn 
Example #3
Source File: learning_rate.py    From fine-lm with MIT License 6 votes vote down vote up
def _learning_rate_warmup(warmup_steps, warmup_schedule="exp", hparams=None):
  """Learning rate warmup multiplier."""
  if not warmup_steps:
    return tf.constant(1.)

  tf.logging.info("Applying %s learning rate warmup for %d steps",
                  warmup_schedule, warmup_steps)

  warmup_steps = tf.to_float(warmup_steps)
  global_step = _global_step(hparams)

  if warmup_schedule == "exp":
    return tf.exp(tf.log(0.01) / warmup_steps)**(warmup_steps - global_step)
  else:
    assert warmup_schedule == "linear"
    start = tf.constant(0.35)
    return ((tf.constant(1.) - start) / warmup_steps) * global_step + start 
Example #4
Source File: distributions.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def __init__(self, batch_size, z_size, mean, logvar):
    """Create a diagonal gaussian distribution.

    Args:
      batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples.
      z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor.
      mean: The N-D mean of the distribution.
      logvar: The N-D log variance of the diagonal distribution.
    """
    size__xz = [None, z_size]
    self.mean = mean            # bxn already
    self.logvar = logvar        # bxn already
    self.noise = noise = tf.random_normal(tf.shape(logvar))
    self.sample = mean + tf.exp(0.5 * logvar) * noise
    mean.set_shape(size__xz)
    logvar.set_shape(size__xz)
    self.sample.set_shape(size__xz) 
Example #5
Source File: distributions.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def diag_gaussian_log_likelihood(z, mu=0.0, logvar=0.0):
  """Log-likelihood under a Gaussian distribution with diagonal covariance.
    Returns the log-likelihood for each dimension.  One should sum the
    results for the log-likelihood under the full multidimensional model.

  Args:
    z: The value to compute the log-likelihood.
    mu: The mean of the Gaussian
    logvar: The log variance of the Gaussian.

  Returns:
    The log-likelihood under the Gaussian model.
  """

  return -0.5 * (logvar + np.log(2*np.pi) + \
                 tf.square((z-mu)/tf.exp(0.5*logvar))) 
Example #6
Source File: common_layers.py    From fine-lm with MIT License 6 votes vote down vote up
def get_timing_signal(length,
                      min_timescale=1,
                      max_timescale=1e4,
                      num_timescales=16):
  """Create Tensor of sinusoids of different frequencies.

  Args:
    length: Length of the Tensor to create, i.e. Number of steps.
    min_timescale: a float
    max_timescale: a float
    num_timescales: an int

  Returns:
    Tensor of shape (length, 2*num_timescales)
  """
  positions = tf.to_float(tf.range(length))
  log_timescale_increment = (
      math.log(max_timescale / min_timescale) / (num_timescales - 1))
  inv_timescales = min_timescale * tf.exp(
      tf.to_float(tf.range(num_timescales)) * -log_timescale_increment)
  scaled_time = tf.expand_dims(positions, 1) * tf.expand_dims(inv_timescales, 0)
  return tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1) 
Example #7
Source File: nn.py    From cs294-112_hws with MIT License 6 votes vote down vote up
def call(self, inputs):
        mean_and_log_std = self.model(inputs)
        mean, log_std = tf.split(mean_and_log_std, num_or_size_splits=2, axis=1)
        log_std = tf.clip_by_value(log_std, -20., 2.)
        
        distribution = tfp.distributions.MultivariateNormalDiag(
            loc=mean,
            scale_diag=tf.exp(log_std)
        )
        
        raw_actions = distribution.sample()
        if not self._reparameterize:
            ### Problem 1.3.A
            ### YOUR CODE HERE
            raw_actions = tf.stop_gradient(raw_actions)
        log_probs = distribution.log_prob(raw_actions)
        log_probs -= self._squash_correction(raw_actions)

        ### Problem 2.A
        ### YOUR CODE HERE
        self.actions = tf.tanh(raw_actions)
            
        return self.actions, log_probs 
Example #8
Source File: autoencoders.py    From fine-lm with MIT License 6 votes vote down vote up
def bottleneck(self, x):  # pylint: disable=arguments-differ
    hparams = self.hparams
    if hparams.unordered:
      return super(AutoencoderOrderedDiscrete, self).bottleneck(x)
    noise = hparams.bottleneck_noise
    hparams.bottleneck_noise = 0.0  # We'll add noise below.
    x, loss = discretization.parametrized_bottleneck(x, hparams)
    hparams.bottleneck_noise = noise
    if hparams.mode == tf.estimator.ModeKeys.TRAIN:
      # We want a number p such that p^bottleneck_bits = 1 - noise.
      # So log(p) * bottleneck_bits = log(noise)
      log_p = tf.log(1 - float(noise) / 2) / float(hparams.bottleneck_bits)
      # Probabilities of flipping are p, p^2, p^3, ..., p^bottleneck_bits.
      noise_mask = 1.0 - tf.exp(tf.cumsum(tf.zeros_like(x) + log_p, axis=-1))
      # Having the no-noise mask, we can make noise just uniformly at random.
      ordered_noise = tf.random_uniform(tf.shape(x))
      # We want our noise to be 1s at the start and random {-1, 1} bits later.
      ordered_noise = tf.to_float(tf.less(noise_mask, ordered_noise))
      # Now we flip the bits of x on the noisy positions (ordered and normal).
      x *= 2.0 * ordered_noise - 1
    return x, loss 
Example #9
Source File: utils.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def gaussian_kernel_matrix(x, y, sigmas):
  r"""Computes a Guassian Radial Basis Kernel between the samples of x and y.

  We create a sum of multiple gaussian kernels each having a width sigma_i.

  Args:
    x: a tensor of shape [num_samples, num_features]
    y: a tensor of shape [num_samples, num_features]
    sigmas: a tensor of floats which denote the widths of each of the
      gaussians in the kernel.
  Returns:
    A tensor of shape [num_samples{x}, num_samples{y}] with the RBF kernel.
  """
  beta = 1. / (2. * (tf.expand_dims(sigmas, 1)))

  dist = compute_pairwise_distances(x, y)

  s = tf.matmul(beta, tf.reshape(dist, (1, -1)))

  return tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist)) 
Example #10
Source File: discretization.py    From fine-lm with MIT License 6 votes vote down vote up
def vae(x, name, z_size):
  """Simple variational autoencoder without discretization.

  Args:
    x: Input to the discretization bottleneck.
    name: Name for the bottleneck scope.
    z_size: Number of bits used to produce discrete code; discrete codes range
      from 1 to 2**z_size.

  Returns:
    Embedding function, latent, loss, mu and log_simga.
  """
  with tf.variable_scope(name):
    mu = tf.layers.dense(x, z_size, name="mu")
    log_sigma = tf.layers.dense(x, z_size, name="log_sigma")
    shape = common_layers.shape_list(x)
    epsilon = tf.random_normal([shape[0], shape[1], 1, z_size])
    z = mu + tf.exp(log_sigma / 2) * epsilon
    kl = 0.5 * tf.reduce_mean(
        tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1)
    free_bits = z_size // 4
    kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
    return z, kl_loss, mu, log_sigma 
Example #11
Source File: train_policy.py    From cs294-112_hws with MIT License 6 votes vote down vote up
def sample_action(self, policy_parameters):
        """
        constructs a symbolic operation for stochastically sampling from the policy
        distribution

        arguments:
            policy_parameters
                mean, log_std) of a Gaussian distribution over actions
                    sy_mean: (batch_size, self.ac_dim)
                    sy_logstd: (batch_size, self.ac_dim)

        returns:
            sy_sampled_ac:
                (batch_size, self.ac_dim)
        """
        sy_mean, sy_logstd = policy_parameters
        sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1)
        return sy_sampled_ac 
Example #12
Source File: competition_model_class.py    From Deep_Learning_Weather_Forecasting with Apache License 2.0 6 votes vote down vote up
def build_graph(self):
        #keras.backend.clear_session() # clear session/graph    
        self.optimizer = keras.optimizers.Adam(lr=self.lr, decay=self.decay)

        self.model = Seq2Seq_MVE_subnets_swish(id_embd=True, time_embd=True,
            lr=self.lr, decay=self.decay,
            num_input_features=self.num_input_features, num_output_features=self.num_output_features,
            num_decoder_features=self.num_decoder_features, layers=self.layers,
            loss=self.loss, regulariser=self.regulariser)

        def _mve_loss(y_true, y_pred):
            pred_u = crop(2,0,3)(y_pred)
            pred_sig = crop(2,3,6)(y_pred)
            print(pred_sig)
            #exp_sig = tf.exp(pred_sig) # avoid pred_sig is too small such as zero    
            #precision = 1./exp_sig
            precision = 1./pred_sig
            #log_loss= 0.5*tf.log(exp_sig)+0.5*precision*((pred_u-y_true)**2)
            log_loss= 0.5*tf.log(pred_sig)+0.5*precision*((pred_u-y_true)**2)            
          
            log_loss=tf.reduce_mean(log_loss)
            return log_loss

        print(self.model.summary())
        self.model.compile(optimizer = self.optimizer, loss=_mve_loss) 
Example #13
Source File: competition_model_class.py    From Deep_Learning_Weather_Forecasting with Apache License 2.0 6 votes vote down vote up
def minus_plus_std_strategy(self, pred_mean, pred_var, feature_name,\
                            timestep_to_ensemble=21, alpha=0):
        '''
        This stratergy aims to calculate linear weighted at specific timestep (timestep_to_ensemble) between prediction and ruitu as formula:
                                    (alpha)*pred_mean + (1-alpha)*ruitu_inputs
        pred_mean: (10, 37, 3)
        pred_var: (10, 37, 3)
        timestep_to_ensemble: int32 (From 0 to 36)
        '''
        print('Using minus_plus_var_strategy with alpha {}'.format(alpha))
        assert 0<=timestep_to_ensemble<=36 , 'Please ensure 0<=timestep_to_ensemble<=36!'
        assert -0.3<= alpha <=0.3, '-0.3<= alpha <=0.3!'
        assert pred_mean.shape == (10, 37, 3), 'Error! This funtion ONLY works for \
        one data sample with shape (10, 37, 3). Any data shape (None, 10, 37, 3) will leads this error!'
        pred_std = np.sqrt(np.exp(pred_var))           
        print('alpha:',alpha)

        pred_mean[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]] = \
        pred_mean[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]] + \
        alpha * pred_std[:,timestep_to_ensemble:,self.obs_and_output_feature_index_map[feature_name]]

        return pred_mean 
Example #14
Source File: utils.py    From UROP-Adversarial-Feature-Matching-for-Text-Generation with GNU Affero General Public License v3.0 5 votes vote down vote up
def logistic_kernel(x, y, param):
	# useful for calculate_logistic_mmd, same symbol as https://en.wikipedia.org/wiki/Logistic_distribution
	s = param['logistic_s']
	numerator = tf.exp(-(x - y) / s)
	denominator = s * tf.square(1 + tf.exp(-(x - y) / s))
	return numerator / denominator 
Example #15
Source File: ops.py    From Generative-Latent-Optimization-Tensorflow with MIT License 5 votes vote down vote up
def selu(x):
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    return scale * tf.where(x > 0.0, x, alpha * tf.exp(x) - alpha) 
Example #16
Source File: policies.py    From lirpg with MIT License 5 votes vote down vote up
def __init__(self, ob_dim, ac_dim):
        # Here we'll construct a bunch of expressions, which will be used in two places:
        # (1) When sampling actions
        # (2) When computing loss functions, for the policy update
        # Variables specific to (1) have the word "sampled" in them,
        # whereas variables specific to (2) have the word "old" in them
        ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations
        oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions
        oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions
        adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate
        wd_dict = {}
        h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict))
        h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict))
        mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output
        self.wd_dict = wd_dict
        self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs
        logstd_1a = tf.expand_dims(logstd_1a, 0)
        std_1a = tf.exp(logstd_1a)
        std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1])
        ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1)
        sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform.
        logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action
        logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy)
        kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim))
        #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n
        surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient
        surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy
        self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob
        #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy
        self.compute_kl = U.function([ob_no, oldac_dist], kl)
        self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss
        U.initialize() # Initialize uninitialized TF variables 
Example #17
Source File: distributions.py    From lirpg with MIT License 5 votes vote down vote up
def kl(self, other):
        a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
        a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keepdims=True)
        ea0 = tf.exp(a0)
        ea1 = tf.exp(a1)
        z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
        z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True)
        p0 = ea0 / z0
        return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1) 
Example #18
Source File: distributions.py    From lirpg with MIT License 5 votes vote down vote up
def entropy(self):
        a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keepdims=True)
        ea0 = tf.exp(a0)
        z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
        p0 = ea0 / z0
        return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1) 
Example #19
Source File: distributions.py    From lirpg with MIT License 5 votes vote down vote up
def __init__(self, flat):
        self.flat = flat
        mean, logstd = tf.split(axis=len(flat.shape)-1, num_or_size_splits=2, value=flat)
        self.mean = mean
        self.logstd = logstd
        self.std = tf.exp(logstd) 
Example #20
Source File: next_frame.py    From fine-lm with MIT License 5 votes vote down vote up
def kl_divergence(self, mu, log_sigma):
    """KL divergence of diagonal gaussian N(mu,exp(log_sigma)) and N(0,1).

    Args:
      mu: mu parameter of the distribution.
      log_sigma: log(sigma) parameter of the distribution.
    Returns:
      the KL loss.
    """

    return -.5 * tf.reduce_sum(
        1. + log_sigma - tf.square(mu) - tf.exp(log_sigma),
        axis=1) 
Example #21
Source File: yellowfin.py    From fine-lm with MIT License 5 votes vote down vote up
def _curvature_range(self):
    """Curvature range.

    Returns:
      h_max_t, h_min_t ops
    """
    self._curv_win = tf.get_variable("curv_win",
                                     dtype=tf.float32,
                                     trainable=False,
                                     shape=[self.curvature_window_width,],
                                     initializer=tf.zeros_initializer)
    # We use log smoothing for curvature range
    self._curv_win = tf.scatter_update(self._curv_win,
                                       self._step % self.curvature_window_width,
                                       tf.log(self._grad_norm_squared))
    # Note here the iterations start from iteration 0
    valid_window = tf.slice(self._curv_win,
                            tf.constant([0,]),
                            tf.expand_dims(
                                tf.minimum(
                                    tf.constant(self.curvature_window_width),
                                    self._step + 1), dim=0))
    self._h_min_t = tf.reduce_min(valid_window)
    self._h_max_t = tf.reduce_max(valid_window)

    curv_range_ops = []
    with tf.control_dependencies([self._h_min_t, self._h_max_t]):
      avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t])
      with tf.control_dependencies([avg_op]):
        self._h_min = tf.exp(
            tf.identity(self._moving_averager.average(self._h_min_t)))
        self._h_max = tf.exp(
            tf.identity(self._moving_averager.average(self._h_max_t)))
        if self._sparsity_debias:
          self._h_min *= self._sparsity_avg
          self._h_max *= self._sparsity_avg
    curv_range_ops.append(avg_op)
    return curv_range_ops  # h_max_t, h_min_t 
Example #22
Source File: op.py    From ArtGAN with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def log_sum_exp(x, axis=1):
    m = tf.reduce_max(x, axis=axis, keep_dims=True)
    return m + tf.log(tf.reduce_sum(tf.exp(x - m), axis=axis)) 
Example #23
Source File: train_ac_exploration_f18.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def sample_action(self, policy_parameters):
        """ Constructs a symbolic operation for stochastically sampling from the policy
            distribution

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

            returns:
                sy_sampled_ac: 
                    if discrete: (batch_size)
                    if continuous: (batch_size, self.ac_dim)

            Hint: for the continuous case, use the reparameterization trick:
                 The output from a Gaussian distribution with mean 'mu' and std 'sigma' is
        
                      mu + sigma * z,         z ~ N(0, I)
        
                 This reduces the problem to just sampling z. (Hint: use tf.random_normal!)
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, num_samples=1), axis=1)
        else:
            sy_mean, sy_logstd = policy_parameters
            sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1)
        return sy_sampled_ac 
Example #24
Source File: density_model.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def make_prior(self, z_size):
        """
            ### PROBLEM 3
            ### YOUR CODE HERE

            args:
                z_size: output dimension of the encoder network

            TODO:
                prior_mean and prior_logstd are for a standard normal distribution
                    both have dimension z_size
        """
        prior_mean = tf.zeros(z_size)
        prior_logstd = tf.zeros(z_size)
        return tfp.distributions.MultivariateNormalDiag(loc=prior_mean, scale_diag=tf.exp(prior_logstd)) 
Example #25
Source File: train_policy.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def ppo_loss(self, log_probs, fixed_log_probs, advantages, clip_epsilon=0.1, entropy_coeff=1e-4):
        """
        given:
            clip_epsilon

        arguments:
            advantages (mini_bsize,)
            states (mini_bsize,)
            actions (mini_bsize,)
            fixed_log_probs (mini_bsize,)

        intermediate results:
            states, actions --> log_probs
            log_probs, fixed_log_probs --> ratio
            advantages, ratio --> surr1
            ratio, clip_epsilon, advantages --> surr2
            surr1, surr2 --> policy_surr_loss
        """
        ratio = tf.exp(log_probs - fixed_log_probs)
        surr1 = ratio * advantages
        surr2 = tf.clip_by_value(ratio, clip_value_min=1.0-clip_epsilon, clip_value_max=1.0+clip_epsilon) * advantages
        policy_surr_loss = -tf.reduce_mean(tf.minimum(surr1, surr2))

        probs = tf.exp(log_probs)
        entropy = tf.reduce_sum(-(log_probs * probs))
        policy_surr_loss -= entropy_coeff * entropy
        return policy_surr_loss 
Example #26
Source File: train_pg_f18.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def get_neg_log_prob(self, policy_parameters, sy_ac_na):
        """ Constructs a symbolic operation for computing the negative log probability of a set 
            of actions that were actually taken according to the policy

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

                sy_ac_na: 
                    if discrete: (batch_size,)
                    if continuous: (batch_size, self.ac_dim)

            returns:
                sy_neg_logprob_n: (batch_size)

            Hint:
                For the discrete case, use the log probability under a categorical distribution.
                For the continuous case, use the log probability under a multivariate gaussian.
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # YOUR_CODE_HERE
            sy_neg_logprob_n = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=sy_ac_na, 
                logits=sy_logits_na
            )
        else:
            sy_mean, sy_logstd = policy_parameters
            # YOUR_CODE_HERE
            sy = (sy_ac_na - sy_mean) / tf.exp(sy_logstd)
            sy_neg_logprob_n = 0.5 * tf.reduce_sum(sy * sy, axis=1)
        return sy_neg_logprob_n 
Example #27
Source File: train_pg_f18.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def sample_action(self, policy_parameters):
        """ Constructs a symbolic operation for stochastically sampling from the policy
            distribution

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

            returns:
                sy_sampled_ac: 
                    if discrete: (batch_size,)
                    if continuous: (batch_size, self.ac_dim)

            Hint: for the continuous case, use the reparameterization trick:
                 The output from a Gaussian distribution with mean 'mu' and std 'sigma' is
        
                      mu + sigma * z,         z ~ N(0, I)
        
                 This reduces the problem to just sampling z. (Hint: use tf.random_normal!)
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # YOUR_CODE_HERE
            sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, 1), axis=1)
        else:
            sy_mean, sy_logstd = policy_parameters
            # YOUR_CODE_HERE
            sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean))
        return sy_sampled_ac

    #========================================================================================#
    #                           ----------PROBLEM 2----------
    #========================================================================================# 
Example #28
Source File: train_ac_f18.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def get_log_prob(self, policy_parameters, sy_ac_na):
        """ Constructs a symbolic operation for computing the log probability of a set of actions
            that were actually taken according to the policy

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

                sy_ac_na: (batch_size, self.ac_dim)

            returns:
                sy_logprob_n: (batch_size)

            Hint:
                For the discrete case, use the log probability under a categorical distribution.
                For the continuous case, use the log probability under a multivariate gaussian.
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # YOUR_HW2 CODE_HERE
            sy_logprob_n = -tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=sy_ac_na, 
                logits=sy_logits_na
            )
        else:
            sy_mean, sy_logstd = policy_parameters
            # YOUR_HW2 CODE_HERE
            sy = (sy_ac_na - sy_mean) / tf.exp(sy_logstd)
            sy_logprob_n = -0.5 * tf.reduce_sum(sy * sy, axis=1)
        return sy_logprob_n 
Example #29
Source File: train_ac_f18.py    From cs294-112_hws with MIT License 5 votes vote down vote up
def sample_action(self, policy_parameters):
        """ Constructs a symbolic operation for stochastically sampling from the policy
            distribution

            arguments:
                policy_parameters
                    if discrete: logits of a categorical distribution over actions 
                        sy_logits_na: (batch_size, self.ac_dim)
                    if continuous: (mean, log_std) of a Gaussian distribution over actions
                        sy_mean: (batch_size, self.ac_dim)
                        sy_logstd: (self.ac_dim,)

            returns:
                sy_sampled_ac: 
                    if discrete: (batch_size)
                    if continuous: (batch_size, self.ac_dim)

            Hint: for the continuous case, use the reparameterization trick:
                 The output from a Gaussian distribution with mean 'mu' and std 'sigma' is
        
                      mu + sigma * z,         z ~ N(0, I)
        
                 This reduces the problem to just sampling z. (Hint: use tf.random_normal!)
        """
        if self.discrete:
            sy_logits_na = policy_parameters
            # YOUR_HW2 CODE_HERE
            sy_sampled_ac = tf.squeeze(tf.multinomial(sy_logits_na, 1), axis=1)
        else:
            sy_mean, sy_logstd = policy_parameters
            # YOUR_HW2 CODE_HERE
            sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean))
        return sy_sampled_ac 
Example #30
Source File: ops.py    From SSGAN-Tensorflow with MIT License 5 votes vote down vote up
def selu(x):
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    return scale * tf.where(x > 0.0, x, alpha * tf.exp(x) - alpha)