Python tensorflow.random_normal() Examples

The following are 30 code examples of tensorflow.random_normal(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: discretization.py    From fine-lm with MIT License 6 votes vote down vote up
def vae(x, name, z_size):
  """Simple variational autoencoder without discretization.

  Args:
    x: Input to the discretization bottleneck.
    name: Name for the bottleneck scope.
    z_size: Number of bits used to produce discrete code; discrete codes range
      from 1 to 2**z_size.

  Returns:
    Embedding function, latent, loss, mu and log_simga.
  """
  with tf.variable_scope(name):
    mu = tf.layers.dense(x, z_size, name="mu")
    log_sigma = tf.layers.dense(x, z_size, name="log_sigma")
    shape = common_layers.shape_list(x)
    epsilon = tf.random_normal([shape[0], shape[1], 1, z_size])
    z = mu + tf.exp(log_sigma / 2) * epsilon
    kl = 0.5 * tf.reduce_mean(
        tf.exp(log_sigma) + tf.square(mu) - 1. - log_sigma, axis=-1)
    free_bits = z_size // 4
    kl_loss = tf.reduce_mean(tf.maximum(kl - free_bits, 0.0))
    return z, kl_loss, mu, log_sigma 
Example #2
Source File: model.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def set_input_shape(self, input_shape):
        batch_size, rows, cols, input_channels = input_shape
        kernel_shape = tuple(self.kernel_shape) + (input_channels,
                                                   self.output_channels)
        assert len(kernel_shape) == 4
        assert all(isinstance(e, int) for e in kernel_shape), kernel_shape
        init = tf.random_normal(kernel_shape, dtype=tf.float32)
        init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init),
                                                   axis=(0, 1, 2)))
        self.kernels = tf.Variable(init)
        self.b = tf.Variable(
            np.zeros((self.output_channels,)).astype('float32'))
        input_shape = list(input_shape)
        input_shape[0] = 1
        dummy_batch = tf.zeros(input_shape)
        dummy_output = self.fprop(dummy_batch)
        output_shape = [int(e) for e in dummy_output.get_shape()]
        output_shape[0] = batch_size
        self.output_shape = tuple(output_shape) 
Example #3
Source File: picklable_model.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def set_input_shape(self, input_shape):
        batch_size, dim = input_shape
        self.input_shape = [batch_size, dim]
        self.output_shape = [batch_size, self.num_hid]
        if self.init_mode == "norm":
            init = tf.random_normal([dim, self.num_hid], dtype=tf.float32)
            init = init / tf.sqrt(1e-7 + tf.reduce_sum(tf.square(init), axis=0,
                                                       keep_dims=True))
            init = init * self.init_scale
        elif self.init_mode == "uniform_unit_scaling":
            scale = np.sqrt(3. / dim)
            init = tf.random_uniform([dim, self.num_hid], dtype=tf.float32,
                                     minval=-scale, maxval=scale)
        else:
            raise ValueError(self.init_mode)
        self.W = PV(init)
        if self.use_bias:
            self.b = PV((np.zeros((self.num_hid,))
                         + self.init_b).astype('float32')) 
Example #4
Source File: access_test.py    From dnc with Apache License 2.0 6 votes vote down vote up
def testBuildAndTrain(self):
    inputs = tf.random_normal([TIME_STEPS, BATCH_SIZE, INPUT_SIZE])

    output, _ = rnn.dynamic_rnn(
        cell=self.module,
        inputs=inputs,
        initial_state=self.initial_state,
        time_major=True)

    targets = np.random.rand(TIME_STEPS, BATCH_SIZE, NUM_READS, WORD_SIZE)
    loss = tf.reduce_mean(tf.square(output - targets))
    train_op = tf.train.GradientDescentOptimizer(1).minimize(loss)
    init = tf.global_variables_initializer()

    with self.test_session():
      init.run()
      train_op.run() 
Example #5
Source File: distributions.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def __init__(self, batch_size, z_size, mean, logvar):
    """Create a diagonal gaussian distribution.

    Args:
      batch_size: The size of the batch, i.e. 0th dim in 2D tensor of samples.
      z_size: The dimension of the distribution, i.e. 1st dim in 2D tensor.
      mean: The N-D mean of the distribution.
      logvar: The N-D log variance of the diagonal distribution.
    """
    size__xz = [None, z_size]
    self.mean = mean            # bxn already
    self.logvar = logvar        # bxn already
    self.noise = noise = tf.random_normal(tf.shape(logvar))
    self.sample = mean + tf.exp(0.5 * logvar) * noise
    mean.set_shape(size__xz)
    logvar.set_shape(size__xz)
    self.sample.set_shape(size__xz) 
Example #6
Source File: test_hessian_vector_products.py    From tangent with Apache License 2.0 6 votes vote down vote up
def _test_tf_hvp(func, optimized, tf):
  a = tf.random_normal(shape=(300,))
  v = tf.reshape(a, shape=(-1,))

  modes = ['forward', 'reverse']
  for mode1 in modes:
    for mode2 in modes:
      if mode1 == mode2 == 'forward':
        continue
      df = tangent.autodiff(
          func,
          mode=mode1,
          motion='joint',
          optimized=optimized,
          check_dims=False)
      ddf = tangent.autodiff(
          df, mode=mode2, motion='joint', optimized=optimized, check_dims=False)
      dx = ddf(a, tf.constant(1.0), v)
      # We just ensure it computes something in this case.
      assert dx.shape == a.shape 
Example #7
Source File: dcgan_test.py    From DeepLab_v3 with MIT License 6 votes vote down vote up
def test_generator_graph(self):
    tf.set_random_seed(1234)
    # Check graph construction for a number of image size/depths and batch
    # sizes.
    for i, batch_size in zip(xrange(3, 7), xrange(3, 8)):
      tf.reset_default_graph()
      final_size = 2 ** i
      noise = tf.random_normal([batch_size, 64])
      image, end_points = dcgan.generator(
          noise,
          depth=32,
          final_size=final_size)

      self.assertAllEqual([batch_size, final_size, final_size, 3],
                          image.shape.as_list())

      expected_names = ['deconv%i' % j for j in xrange(1, i)] + ['logits']
      self.assertSetEqual(set(expected_names), set(end_points.keys()))

      # Check layer depths.
      for j in range(1, i):
        layer = end_points['deconv%i' % j]
        self.assertEqual(32 * 2**(i-j-1), layer.get_shape().as_list()[-1]) 
Example #8
Source File: blocks_std_test.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def testLinearShared(self):
    # Create a linear map which is applied twice on different inputs
    # (i.e. the weights of the map are shared).
    linear_map = blocks_std.Linear(6)
    x1 = tf.random_normal(shape=[1, 5])
    x2 = tf.random_normal(shape=[1, 5])
    xs = x1 + x2

    # Apply the transform with the same weights.
    y1 = linear_map(x1)
    y2 = linear_map(x2)
    ys = linear_map(xs)

    with self.test_session() as sess:
      # Initialize all the variables of the graph.
      tf.global_variables_initializer().run()

      y1_res, y2_res, ys_res = sess.run([y1, y2, ys])
      self.assertAllClose(y1_res + y2_res, ys_res) 
Example #9
Source File: DenoisingAutoencoder.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(),
                 scale = 0.1):
        self.n_input = n_input
        self.n_hidden = n_hidden
        self.transfer = transfer_function
        self.scale = tf.placeholder(tf.float32)
        self.training_scale = scale
        network_weights = self._initialize_weights()
        self.weights = network_weights

        # model
        self.x = tf.placeholder(tf.float32, [None, self.n_input])
        self.hidden = self.transfer(tf.add(tf.matmul(self.x + scale * tf.random_normal((n_input,)),
                self.weights['w1']),
                self.weights['b1']))
        self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])

        # cost
        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        self.optimizer = optimizer.minimize(self.cost)

        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init) 
Example #10
Source File: policy.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def sample_action(self, logits, sampling_dim,
                    act_dim, act_type, greedy=False):
    """Sample an action from a distribution."""
    if self.env_spec.is_discrete(act_type):
      if greedy:
        act = tf.argmax(logits, 1)
      else:
        act = tf.reshape(tf.multinomial(logits, 1), [-1])
    elif self.env_spec.is_box(act_type):
      means = logits[:, :sampling_dim / 2]
      std = logits[:, sampling_dim / 2:]
      if greedy:
        act = means
      else:
        batch_size = tf.shape(logits)[0]
        act = means + std * tf.random_normal([batch_size, act_dim])
    else:
      assert False

    return act 
Example #11
Source File: continuous.py    From tensorflow_RL with MIT License 6 votes vote down vote up
def __init__(self, name, state_size, output_size):
        self.state_size = state_size
        self.output_size = output_size

        with tf.variable_scope(name):
            self.input = tf.placeholder(tf.float32, shape=[None, self.state_size])
            self.action = tf.placeholder(tf.float32, shape=[None, self.output_size])

            self.l1 = tf.layers.dense(inputs=self.input, units=128, activation=tf.nn.relu)
            self.l2 = tf.layers.dense(inputs=self.l1,    units=128, activation=tf.nn.relu)
            self.l3 = tf.layers.dense(inputs=self.l2,    units=128, activation=tf.nn.relu)

            self.mu = tf.layers.dense(inputs=self.l3,    units=self.output_size, activation=None)
            self.log_std = tf.get_variable(name='log_std', initializer= -0.5 * np.ones(self.output_size, dtype=np.float32))
            self.std = tf.exp(self.log_std)
            self.pi = self.mu + tf.random_normal(tf.shape(self.mu)) * self.std
            self.logp = gaussian_likelihood(self.action, self.mu, self.log_std)
            self.logp_pi = gaussian_likelihood(self.pi, self.mu, self.log_std)
    
            self.scope = tf.get_variable_scope().name 
Example #12
Source File: train_policy.py    From cs294-112_hws with MIT License 6 votes vote down vote up
def sample_action(self, policy_parameters):
        """
        constructs a symbolic operation for stochastically sampling from the policy
        distribution

        arguments:
            policy_parameters
                mean, log_std) of a Gaussian distribution over actions
                    sy_mean: (batch_size, self.ac_dim)
                    sy_logstd: (batch_size, self.ac_dim)

        returns:
            sy_sampled_ac:
                (batch_size, self.ac_dim)
        """
        sy_mean, sy_logstd = policy_parameters
        sy_sampled_ac = sy_mean + tf.exp(sy_logstd) * tf.random_normal(tf.shape(sy_mean), 0, 1)
        return sy_sampled_ac 
Example #13
Source File: value_functions.py    From HardRLWithYoutube with MIT License 6 votes vote down vote up
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613
        X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations
        vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg')
        wd_dict = {}
        h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
        h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
        vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0]
        sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n))
        wd_loss = tf.get_collection("vf_losses", None)
        loss = tf.reduce_mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss)
        loss_sampled = tf.reduce_mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n)))
        self._predict = U.function([X], vpred_n)
        optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \
                                    clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \
                                    async=1, kfac_update=2, cold_iter=50, \
                                    weight_decay_dict=wd_dict, max_grad_norm=None)
        vf_var_list = []
        for var in tf.trainable_variables():
            if "vf" in var.name:
                vf_var_list.append(var)

        update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list)
        self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101
        U.initialize() # Initialize uninitialized TF variables 
Example #14
Source File: ops.py    From tensorflow-alexnet with MIT License 6 votes vote down vote up
def fc(inputs, output_size, init_bias=0.0, activation_func=tf.nn.relu, stddev=0.01):
    input_shape = inputs.get_shape().as_list()
    if len(input_shape) == 4:
        fc_weights = tf.Variable(
            tf.random_normal([input_shape[1] * input_shape[2] * input_shape[3], output_size], dtype=tf.float32,
                             stddev=stddev),
            name='weights')
        inputs = tf.reshape(inputs, [-1, fc_weights.get_shape().as_list()[0]])
    else:
        fc_weights = tf.Variable(tf.random_normal([input_shape[-1], output_size], dtype=tf.float32, stddev=stddev),
                                 name='weights')

    fc_biases = tf.Variable(tf.constant(init_bias, shape=[output_size], dtype=tf.float32), name='biases')
    fc_layer = tf.matmul(inputs, fc_weights)
    fc_layer = tf.nn.bias_add(fc_layer, fc_biases)
    if activation_func:
        fc_layer = activation_func(fc_layer)
    return fc_layer 
Example #15
Source File: common_layers_test.py    From fine-lm with MIT License 6 votes vote down vote up
def testDmlLoss(self, batch, height, width, num_mixtures, reduce_sum):
    channels = 3
    pred = tf.random_normal([batch, height, width, num_mixtures * 10])
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=0, maxval=256, dtype=tf.int32)
    actual_loss_num, actual_loss_den = common_layers.dml_loss(
        pred=pred, labels=labels, reduce_sum=reduce_sum)
    actual_loss = actual_loss_num / actual_loss_den

    real_labels = common_layers.convert_rgb_to_symmetric_real(labels)
    expected_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=real_labels) / channels
    if reduce_sum:
      expected_loss = tf.reduce_mean(expected_loss)

    with self.test_session() as sess:
      actual_loss_val, expected_loss_val = sess.run(
          [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val) 
Example #16
Source File: common_image_attention_test.py    From fine-lm with MIT License 6 votes vote down vote up
def testCreateOutputTrainMode(self, likelihood, num_mixtures, depth):
    batch = 1
    height = 8
    width = 8
    channels = 3
    rows = height
    if likelihood == common_image_attention.DistributionType.CAT:
      cols = channels * width
    else:
      cols = width
    hparams = tf.contrib.training.HParams(
        hidden_size=2,
        likelihood=likelihood,
        mode=tf.estimator.ModeKeys.TRAIN,
        num_mixtures=num_mixtures,
    )
    decoder_output = tf.random_normal([batch, rows, cols, hparams.hidden_size])
    targets = tf.random_uniform([batch, height, width, channels],
                                minval=-1., maxval=1.)
    output = common_image_attention.create_output(
        decoder_output, rows, cols, targets, hparams)
    if hparams.likelihood == common_image_attention.DistributionType.CAT:
      self.assertEqual(output.shape, (batch, height, width, channels, depth))
    else:
      self.assertEqual(output.shape, (batch, height, width, depth)) 
Example #17
Source File: value_functions.py    From lirpg with MIT License 6 votes vote down vote up
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613
        X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations
        vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg')
        wd_dict = {}
        h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
        h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
        vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0]
        sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n))
        wd_loss = tf.get_collection("vf_losses", None)
        loss = tf.reduce_mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss)
        loss_sampled = tf.reduce_mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n)))
        self._predict = U.function([X], vpred_n)
        optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \
                                    clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \
                                    async=1, kfac_update=2, cold_iter=50, \
                                    weight_decay_dict=wd_dict, max_grad_norm=None)
        vf_var_list = []
        for var in tf.trainable_variables():
            if "vf" in var.name:
                vf_var_list.append(var)

        update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list)
        self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101
        U.initialize() # Initialize uninitialized TF variables 
Example #18
Source File: cnn2d.py    From deep_architect with MIT License 5 votes vote down vote up
def kaiming2015delving_initializer_conv(gain=1.0):

    def init_fn(shape):
        n = np.product(shape)
        stddev = gain * np.sqrt(2.0 / n)
        init_vals = tf.random_normal(shape, 0.0, stddev)
        return init_vals

    return init_fn 
Example #19
Source File: ddpg.py    From HardRLWithYoutube with MIT License 5 votes vote down vote up
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev):
    assert len(actor.vars) == len(perturbed_actor.vars)
    assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars)

    updates = []
    for var, perturbed_var in zip(actor.vars, perturbed_actor.vars):
        if var in actor.perturbable_vars:
            logger.info('  {} <- {} + noise'.format(perturbed_var.name, var.name))
            updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev)))
        else:
            logger.info('  {} <- {}'.format(perturbed_var.name, var.name))
            updates.append(tf.assign(perturbed_var, var))
    assert len(updates) == len(actor.vars)
    return tf.group(*updates) 
Example #20
Source File: policies.py    From lirpg with MIT License 5 votes vote down vote up
def __init__(self, ob_dim, ac_dim):
        # Here we'll construct a bunch of expressions, which will be used in two places:
        # (1) When sampling actions
        # (2) When computing loss functions, for the policy update
        # Variables specific to (1) have the word "sampled" in them,
        # whereas variables specific to (2) have the word "old" in them
        ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations
        oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions
        oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions
        adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate
        wd_dict = {}
        h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict))
        h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict))
        mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output
        self.wd_dict = wd_dict
        self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs
        logstd_1a = tf.expand_dims(logstd_1a, 0)
        std_1a = tf.exp(logstd_1a)
        std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1])
        ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1)
        sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform.
        logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action
        logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy)
        kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim))
        #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n
        surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient
        surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy
        self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob
        #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy
        self.compute_kl = U.function([ob_no, oldac_dist], kl)
        self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss
        U.initialize() # Initialize uninitialized TF variables 
Example #21
Source File: utils.py    From SentenceFunction with Apache License 2.0 5 votes vote down vote up
def sample_gaussian(mu, logvar):
    epsilon = tf.random_normal(tf.shape(logvar), name="epsilon")
    std = tf.exp(0.5 * logvar)
    z = mu + tf.multiply(std, epsilon)
    return z 
Example #22
Source File: image_utils_test.py    From fine-lm with MIT License 5 votes vote down vote up
def testMakeMultiscaleDilatedLarger(self):
    image = tf.random_normal([256, 256, 3])
    resolutions = [257]
    with self.assertRaisesRegexp(ValueError, "strides.* must be non-zero"):
      _ = image_utils.make_multiscale_dilated(image, resolutions) 
Example #23
Source File: image_utils_test.py    From fine-lm with MIT License 5 votes vote down vote up
def testMakeMultiscaleDilatedIndivisible(self):
    image = tf.random_normal([256, 256, 3])
    resolutions = [255]
    scaled_images = image_utils.make_multiscale_dilated(image, resolutions)
    self.assertEqual(scaled_images[0].shape, (256, 256, 3)) 
Example #24
Source File: discretization_test.py    From fine-lm with MIT License 5 votes vote down vote up
def testProjectHidden(self):
    hidden_size = 60
    block_dim = 20
    num_blocks = 3
    x = tf.zeros(shape=[1, hidden_size], dtype=tf.float32)
    projection_tensors = tf.random_normal(
        shape=[num_blocks, hidden_size, block_dim], dtype=tf.float32)
    x_projected = discretization.project_hidden(x, projection_tensors,
                                                hidden_size, num_blocks)
    with self.test_session() as sess:
      tf.global_variables_initializer().run()
      x_projected_eval = sess.run(x_projected)
      self.assertEqual(np.shape(x_projected_eval), (1, num_blocks, block_dim))
      self.assertTrue(np.all(x_projected_eval == 0)) 
Example #25
Source File: distributions.py    From lirpg with MIT License 5 votes vote down vote up
def sample(self):
        return self.mean + self.std * tf.random_normal(tf.shape(self.mean)) 
Example #26
Source File: ddpg.py    From lirpg with MIT License 5 votes vote down vote up
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev):
    assert len(actor.vars) == len(perturbed_actor.vars)
    assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars)

    updates = []
    for var, perturbed_var in zip(actor.vars, perturbed_actor.vars):
        if var in actor.perturbable_vars:
            logger.info('  {} <- {} + noise'.format(perturbed_var.name, var.name))
            updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev)))
        else:
            logger.info('  {} <- {}'.format(perturbed_var.name, var.name))
            updates.append(tf.assign(perturbed_var, var))
    assert len(updates) == len(actor.vars)
    return tf.group(*updates) 
Example #27
Source File: policies.py    From HardRLWithYoutube with MIT License 5 votes vote down vote up
def __init__(self, ob_dim, ac_dim):
        # Here we'll construct a bunch of expressions, which will be used in two places:
        # (1) When sampling actions
        # (2) When computing loss functions, for the policy update
        # Variables specific to (1) have the word "sampled" in them,
        # whereas variables specific to (2) have the word "old" in them
        ob_no = tf.placeholder(tf.float32, shape=[None, ob_dim*2], name="ob") # batch of observations
        oldac_na = tf.placeholder(tf.float32, shape=[None, ac_dim], name="ac") # batch of actions previous actions
        oldac_dist = tf.placeholder(tf.float32, shape=[None, ac_dim*2], name="oldac_dist") # batch of actions previous action distributions
        adv_n = tf.placeholder(tf.float32, shape=[None], name="adv") # advantage function estimate
        wd_dict = {}
        h1 = tf.nn.tanh(dense(ob_no, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict))
        h2 = tf.nn.tanh(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0.0, weight_loss_dict=wd_dict))
        mean_na = dense(h2, ac_dim, "mean", weight_init=U.normc_initializer(0.1), bias_init=0.0, weight_loss_dict=wd_dict) # Mean control output
        self.wd_dict = wd_dict
        self.logstd_1a = logstd_1a = tf.get_variable("logstd", [ac_dim], tf.float32, tf.zeros_initializer()) # Variance on outputs
        logstd_1a = tf.expand_dims(logstd_1a, 0)
        std_1a = tf.exp(logstd_1a)
        std_na = tf.tile(std_1a, [tf.shape(mean_na)[0], 1])
        ac_dist = tf.concat([tf.reshape(mean_na, [-1, ac_dim]), tf.reshape(std_na, [-1, ac_dim])], 1)
        sampled_ac_na = tf.random_normal(tf.shape(ac_dist[:,ac_dim:])) * ac_dist[:,ac_dim:] + ac_dist[:,:ac_dim] # This is the sampled action we'll perform.
        logprobsampled_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - sampled_ac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of sampled action
        logprob_n = - tf.reduce_sum(tf.log(ac_dist[:,ac_dim:]), axis=1) - 0.5 * tf.log(2.0*np.pi)*ac_dim - 0.5 * tf.reduce_sum(tf.square(ac_dist[:,:ac_dim] - oldac_na) / (tf.square(ac_dist[:,ac_dim:])), axis=1) # Logprob of previous actions under CURRENT policy (whereas oldlogprob_n is under OLD policy)
        kl = tf.reduce_mean(kl_div(oldac_dist, ac_dist, ac_dim))
        #kl = .5 * tf.reduce_mean(tf.square(logprob_n - oldlogprob_n)) # Approximation of KL divergence between old policy used to generate actions, and new policy used to compute logprob_n
        surr = - tf.reduce_mean(adv_n * logprob_n) # Loss function that we'll differentiate to get the policy gradient
        surr_sampled = - tf.reduce_mean(logprob_n) # Sampled loss of the policy
        self._act = U.function([ob_no], [sampled_ac_na, ac_dist, logprobsampled_n]) # Generate a new action and its logprob
        #self.compute_kl = U.function([ob_no, oldac_na, oldlogprob_n], kl) # Compute (approximate) KL divergence between old policy and new policy
        self.compute_kl = U.function([ob_no, oldac_dist], kl)
        self.update_info = ((ob_no, oldac_na, adv_n), surr, surr_sampled) # Input and output variables needed for computing loss
        U.initialize() # Initialize uninitialized TF variables 
Example #28
Source File: ddpg_learner.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 5 votes vote down vote up
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev):
    assert len(actor.vars) == len(perturbed_actor.vars)
    assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars)

    updates = []
    for var, perturbed_var in zip(actor.vars, perturbed_actor.vars):
        if var in actor.perturbable_vars:
            logger.info('  {} <- {} + noise'.format(perturbed_var.name, var.name))
            updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev)))
        else:
            logger.info('  {} <- {}'.format(perturbed_var.name, var.name))
            updates.append(tf.assign(perturbed_var, var))
    assert len(updates) == len(actor.vars)
    return tf.group(*updates) 
Example #29
Source File: distributions.py    From HardRLWithYoutube with MIT License 5 votes vote down vote up
def sample(self):
        return self.mean + self.std * tf.random_normal(tf.shape(self.mean)) 
Example #30
Source File: distributions.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 5 votes vote down vote up
def sample(self):
        return self.mean + self.std * tf.random_normal(tf.shape(self.mean))