Python tensorflow.scan() Examples

The following are 30 code examples of tensorflow.scan(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: ppo.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def calculate_generalized_advantage_estimator(
    reward, value, done, gae_gamma, gae_lambda):
  # pylint: disable=g-doc-args
  """Generalized advantage estimator.

  Returns:
    GAE estimator. It will be one element shorter than the input; this is
    because to compute GAE for [0, ..., N-1] one needs V for [1, ..., N].
  """
  # pylint: enable=g-doc-args

  next_value = value[1:, :]
  next_not_done = 1 - tf.cast(done[1:, :], tf.float32)
  delta = (reward[:-1, :] + gae_gamma * next_value * next_not_done
           - value[:-1, :])

  return_ = tf.reverse(tf.scan(
      lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg,
      [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])],
      tf.zeros_like(delta[0, :]),
      parallel_iterations=1), [0])
  return tf.check_numerics(return_, "return") 
Example #2
Source File: temporal_difference.py    From planet with Apache License 2.0 6 votes vote down vote up
def discounted_return(reward, discount, bootstrap, axis, stop_gradient=True):
  """Discounted Monte Carlo return."""
  if discount == 1 and bootstrap is None:
    return tf.reduce_sum(reward, axis)
  if discount == 1:
    return tf.reduce_sum(reward, axis) + bootstrap
  # Bring the aggregation dimension front.
  dims = list(range(reward.shape.ndims))
  dims = [axis] + dims[1:axis] + [0] + dims[axis + 1:]
  reward = tf.transpose(reward, dims)
  if bootstrap is None:
    bootstrap = tf.zeros_like(reward[-1])
  return_ = tf.scan(
      fn=lambda agg, cur: cur + discount * agg,
      elems=reward,
      initializer=bootstrap,
      back_prop=not stop_gradient,
      reverse=True)
  return_ = tf.transpose(return_, dims)
  if stop_gradient:
    return_ = tf.stop_gradient(return_)
  return return_ 
Example #3
Source File: policy.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def multi_step(self, all_obs, initial_state, all_actions):
    """Calculate log-probs and other calculations on batch of episodes."""
    batch_size = tf.shape(initial_state)[0]
    time_length = tf.shape(all_obs[0])[0]
    initial_actions = [act[0] for act in all_actions]
    all_actions = [tf.concat([act[1:], act[0:1]], 0)
                   for act in all_actions]  # "final" action is dummy

    (internal_states, _, logits, log_probs,
     entropies, self_kls) = tf.scan(
        self.single_step,
        (all_obs, all_actions),
        initializer=self.get_initializer(
            batch_size, initial_state, initial_actions))

    # remove "final" computations
    log_probs = [log_prob[:-1] for log_prob in log_probs]
    entropies = [entropy[:-1] for entropy in entropies]
    self_kls = [self_kl[:-1] for self_kl in self_kls]

    return internal_states, logits, log_probs, entropies, self_kls 
Example #4
Source File: ppo.py    From fine-lm with MIT License 6 votes vote down vote up
def calculate_generalized_advantage_estimator(
    reward, value, done, gae_gamma, gae_lambda):
  """Generalized advantage estimator."""

  # Below is slight weirdness, we set the last reward to 0.
  # This makes the advantage to be 0 in the last timestep
  reward = tf.concat([reward[:-1, :], value[-1:, :]], axis=0)
  next_value = tf.concat([value[1:, :], tf.zeros_like(value[-1:, :])], axis=0)
  next_not_done = 1 - tf.cast(tf.concat([done[1:, :],
                                         tf.zeros_like(done[-1:, :])], axis=0),
                              tf.float32)
  delta = reward + gae_gamma * next_value * next_not_done - value

  return_ = tf.reverse(tf.scan(
      lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg,
      [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])],
      tf.zeros_like(delta[0, :]),
      parallel_iterations=1), [0])
  return tf.check_numerics(return_, "return") 
Example #5
Source File: tf_atari_wrappers.py    From fine-lm with MIT License 6 votes vote down vote up
def simulate(self, action):
    with tf.name_scope("environment/simulate"):  # Do we need this?
      initializer = (tf.zeros_like(self._observ),
                     tf.fill((len(self),), 0.0), tf.fill((len(self),), False))

      def not_done_step(a, _):
        reward, done = self._batch_env.simulate(action)
        with tf.control_dependencies([reward, done]):
          # TODO(piotrmilos): possibly ignore envs with done
          r0 = tf.maximum(a[0], self._batch_env.observ)
          r1 = tf.add(a[1], reward)
          r2 = tf.logical_or(a[2], done)

          return (r0, r1, r2)

      simulate_ret = tf.scan(not_done_step, tf.range(self.skip),
                             initializer=initializer, parallel_iterations=1,
                             infer_shape=False)
      simulate_ret = [ret[-1, ...] for ret in simulate_ret]

      with tf.control_dependencies([self._observ.assign(simulate_ret[0])]):
        return tf.identity(simulate_ret[1]), tf.identity(simulate_ret[2]) 
Example #6
Source File: tf_atari_wrappers.py    From fine-lm with MIT License 6 votes vote down vote up
def simulate(self, action):
    with tf.name_scope("environment/simulate"):  # Do we need this?
      initializer = (tf.zeros(self.old_shape, dtype=tf.float32),
                     tf.fill((len(self),), 0.0), tf.fill((len(self),), False))

      def not_done_step(a, _):
        reward, done = self._batch_env.simulate(action)
        with tf.control_dependencies([reward, done]):
          r0 = self._batch_env.observ + 0
          r1 = tf.add(a[1], reward)
          r2 = tf.logical_or(a[2], done)
          return (r0, r1, r2)

      simulate_ret = tf.scan(not_done_step, tf.range(self.skip),
                             initializer=initializer, parallel_iterations=1,
                             infer_shape=False)
      observations, rewards, dones = simulate_ret
      split_observations = tf.split(observations, self.skip, axis=0)
      split_observations = [tf.squeeze(o, axis=0) for o in split_observations]
      observation = tf.concat(split_observations, axis=-1)
      with tf.control_dependencies([self._observ.assign(observation)]):
        return tf.identity(rewards[-1, ...]), tf.identity(dones[-1, ...]) 
Example #7
Source File: algorithm.py    From soccer-matlab with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _update_value(self, observ, reward, length):
    """Perform multiple update steps of the value baseline.

    We need to decide for the summary of one iteration, and thus choose the one
    after half of the iterations.

    Args:
      observ: Sequences of observations.
      reward: Sequences of reward.
      length: Batch of sequence lengths.

    Returns:
      Summary tensor.
    """
    with tf.name_scope('update_value'):
      loss, summary = tf.scan(
          lambda _1, _2: self._update_value_step(observ, reward, length),
          tf.range(self._config.update_epochs_value),
          [0., ''], parallel_iterations=1)
      print_loss = tf.Print(0, [tf.reduce_mean(loss)], 'value loss: ')
      with tf.control_dependencies([loss, print_loss]):
        return summary[self._config.update_epochs_value // 2] 
Example #8
Source File: functional_ops_test.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def testScanFoldl_Nested(self):
    with self.test_session():
      elems = tf.constant([1.0, 2.0, 3.0, 4.0], name="data")
      inner_elems = tf.constant([0.5, 0.5], name="data")

      def r_inner(a, x):
        return tf.foldl(lambda b, y: b * y * x, inner_elems, initializer=a)

      r = tf.scan(r_inner, elems)

      # t == 0 (returns 1)
      # t == 1, a == 1, x == 2 (returns 1)
      #   t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1
      #   t_1 == 1, b == 1,      y == 0.5, returns b * y * x = 1
      # t == 2, a == 1, x == 3 (returns 1.5*1.5 == 2.25)
      #   t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1.5
      #   t_1 == 1, b == 1.5,    y == 0.5, returns b * y * x = 1.5*1.5
      # t == 3, a == 2.25, x == 4 (returns 9)
      #   t_0 == 0, b == a == 2.25, y == 0.5, returns b * y * x = 4.5
      #   t_1 == 1, b == 4.5,       y == 0.5, returns b * y * x = 9
      self.assertAllClose([1., 1., 2.25, 9.], r.eval()) 
Example #9
Source File: functional_ops_test.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def testScan_Scoped(self):
    with self.test_session() as sess:
      with tf.variable_scope("root") as varscope:
        elems = tf.constant([1, 2, 3, 4, 5, 6], name="data")

        r = tf.scan(simple_scoped_fn, elems)
        # Check that we have the one variable we asked for here.
        self.assertEqual(len(tf.trainable_variables()), 1)
        self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0")
        sess.run([tf.global_variables_initializer()])
        results = np.array([1, 6, 18, 44, 98, 208])
        self.assertAllEqual(results, r.eval())

        # Now let's reuse our single variable.
        varscope.reuse_variables()
        r = tf.scan(simple_scoped_fn, elems, initializer=2)
        self.assertEqual(len(tf.trainable_variables()), 1)
        results = np.array([6, 16, 38, 84, 178, 368])
        self.assertAllEqual(results, r.eval()) 
Example #10
Source File: TestUpd.py    From NTM-One-Shot-TF with MIT License 6 votes vote down vote up
def omniglot():

    sess = tf.InteractiveSession()

    """    def wrapper(v):
        return tf.Print(v, [v], message="Printing v")

    v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix')

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp')
    temp = wrapper(v)
    #with tf.control_dependencies([temp]):
    temp.eval()
    print 'Hello'"""

    def update_tensor(V, dim2, val):  # Update tensor V, with index(:,dim2[:]) by val[:]
        val = tf.cast(val, V.dtype)
        def body(_, (v, d2, chg)):
            d2_int = tf.cast(d2, tf.int32)
            return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]])
        Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update")
        return Z 
Example #11
Source File: filter.py    From kvae with MIT License 6 votes vote down vote up
def compute_forwards(self, reuse=None):
        """Compute the forward step in the Kalman filter.
           The forward pass is intialized with p(z_1)=N(self.mu, self.Sigma).
           We then return the mean and covariances of the predictive distribution p(z_t|z_tm1,u_t), t=2,..T+1
           and the filtering distribution p(z_t|x_1:t,u_1:t), t=1,..T
           We follow the notation of Murphy's book, section 18.3.1
        """

        # To make sure we are not accidentally using the real outputs in the steps with missing values, set them to 0.
        y_masked = tf.multiply(tf.expand_dims(self.mask, 2), self.y)
        inputs = tf.concat([y_masked, self.u, tf.expand_dims(self.mask, 2)], axis=2)

        y_prev = tf.expand_dims(self.y_0, 0)  # (1, dim_y)
        y_prev = tf.tile(y_prev, (tf.shape(self.mu)[0], 1))
        alpha, state, u, buffer = self.alpha(y_prev, self.state, self.u[:, 0], init_buffer=True, reuse= reuse)

        # dummy matrix to initialize B and C in scan
        dummy_init_A = tf.ones([self.Sigma.get_shape()[0], self.dim_z, self.dim_z])
        dummy_init_B = tf.ones([self.Sigma.get_shape()[0], self.dim_z, self.dim_u])
        dummy_init_C = tf.ones([self.Sigma.get_shape()[0], self.dim_y, self.dim_z])
        forward_states = tf.scan(self.forward_step_fn, tf.transpose(inputs, [1, 0, 2]),
                                 initializer=(self.mu, self.Sigma, self.mu, self.Sigma, alpha, u, state, buffer,
                                              dummy_init_A, dummy_init_B, dummy_init_C),
                                 parallel_iterations=1, name='forward')
        return forward_states 
Example #12
Source File: rewards.py    From Counterfactual-StoryRW with MIT License 6 votes vote down vote up
def _discount_reward_tensor_2d(reward, sequence_length=None,
                               discount=1., dtype=None):
    if sequence_length is not None:
        reward = mask_sequences(
            reward, sequence_length, dtype=dtype, tensor_rank=2)

    if discount == 1.:
        disc_reward = tf.cumsum(reward, axis=1, reverse=True)
    else:
        # [max_time, batch_size]
        rev_reward_T = tf.transpose(tf.reverse(reward, [1]), [1, 0])
        rev_reward_T_cum = tf.scan(
            fn=lambda acc, cur: cur + discount * acc,
            elems=rev_reward_T,
            initializer=tf.zeros_like(reward[:, 1]),
            back_prop=False)
        disc_reward = tf.reverse(
            tf.transpose(rev_reward_T_cum, [1, 0]), [1])

    return disc_reward 
Example #13
Source File: policy.py    From yolo_v2 with Apache License 2.0 6 votes vote down vote up
def multi_step(self, all_obs, initial_state, all_actions):
    """Calculate log-probs and other calculations on batch of episodes."""
    batch_size = tf.shape(initial_state)[0]
    time_length = tf.shape(all_obs[0])[0]
    initial_actions = [act[0] for act in all_actions]
    all_actions = [tf.concat([act[1:], act[0:1]], 0)
                   for act in all_actions]  # "final" action is dummy

    (internal_states, _, logits, log_probs,
     entropies, self_kls) = tf.scan(
        self.single_step,
        (all_obs, all_actions),
        initializer=self.get_initializer(
            batch_size, initial_state, initial_actions))

    # remove "final" computations
    log_probs = [log_prob[:-1] for log_prob in log_probs]
    entropies = [entropy[:-1] for entropy in entropies]
    self_kls = [self_kl[:-1] for self_kl in self_kls]

    return internal_states, logits, log_probs, entropies, self_kls 
Example #14
Source File: ppo.py    From training_results_v0.5 with Apache License 2.0 6 votes vote down vote up
def calculate_generalized_advantage_estimator(
    reward, value, done, gae_gamma, gae_lambda):
  # pylint: disable=g-doc-args
  """Generalized advantage estimator.

  Returns:
    GAE estimator. It will be one element shorter than the input; this is
    because to compute GAE for [0, ..., N-1] one needs V for [1, ..., N].
  """
  # pylint: enable=g-doc-args

  next_value = value[1:, :]
  next_not_done = 1 - tf.cast(done[1:, :], tf.float32)
  delta = (reward[:-1, :] + gae_gamma * next_value * next_not_done
           - value[:-1, :])

  return_ = tf.reverse(tf.scan(
      lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg,
      [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])],
      tf.zeros_like(delta[0, :]),
      parallel_iterations=1), [0])
  return tf.check_numerics(return_, "return") 
Example #15
Source File: embeddings.py    From inferbeddings with MIT License 6 votes vote down vote up
def bilinear_diagonal_walk_embedding(predicate_embeddings):
    """
    Takes a walk, represented by a 3D Tensor with shape (batch_size, walk_length, embedding_length),
    and computes its embedding using a simple bilinear diagonal models.
    This method is roughly equivalent to:
    > walk_embedding = tf.reduce_prod(predicate_embeddings, axis=1)

    :param predicate_embeddings: 3D Tensor containing the embedding of the predicates in the walk.
    :return: 2D tensor of size (batch_size, embedding_length) containing the walk embeddings.
    """
    batch_size, embedding_len = tf.shape(predicate_embeddings)[0], tf.shape(predicate_embeddings)[2]

    # Transpose the (batch_size, walk_length, n) Tensor in a (walk_length, batch_size, n) Tensor
    transposed_embedding_matrix = tf.transpose(predicate_embeddings, perm=[1, 0, 2])

    # Define the initializer of the scan procedure - an all-ones matrix
    # where one is the neutral element wrt. the element-wise product
    initializer = tf.ones((batch_size, embedding_len), dtype=predicate_embeddings.dtype)

    # The walk embeddings are given by the element-wise product of the predicate embeddings
    walk_embedding = tf.scan(lambda x, y: x * y, transposed_embedding_matrix, initializer=initializer)

    # Add the initializer as the first step in the scan sequence, in case the walk has zero-length
    return tf.concat(values=[tf.expand_dims(initializer, 0), walk_embedding], axis=0)[-1] 
Example #16
Source File: embeddings.py    From inferbeddings with MIT License 6 votes vote down vote up
def additive_walk_embedding(predicate_embeddings):
    """
    Takes a walk, represented by a 3D Tensor with shape (batch_size, walk_length, embedding_length),
    and computes its embedding using a simple additive models.
    This method is roughly equivalent to:
    > walk_embedding = tf.reduce_prod(predicate_embeddings, axis=1)

    :param predicate_embeddings: 3D Tensor containing the embedding of the predicates in the walk.
    :return: 2D tensor of size (batch_size, embedding_length) containing the walk embeddings.
    """
    batch_size, embedding_len = tf.shape(predicate_embeddings)[0], tf.shape(predicate_embeddings)[2]

    # Transpose the (batch_size, walk_length, n) Tensor in a (walk_length, batch_size, n) Tensor
    transposed_embedding_matrix = tf.transpose(predicate_embeddings, perm=[1, 0, 2])

    # Define the initializer of the scan procedure - an all-zeros matrix
    initializer = tf.zeros((batch_size, embedding_len), dtype=predicate_embeddings.dtype)

    # The walk embeddings are given by the sum of the predicate embeddings
    # where zero is the neutral element wrt. the element-wise sum
    walk_embedding = tf.scan(lambda x, y: x + y, transposed_embedding_matrix, initializer=initializer)

    # Add the initializer as the first step in the scan sequence, in case the walk has zero-length
    return tf.concat(values=[tf.expand_dims(initializer, 0), walk_embedding], axis=0)[-1] 
Example #17
Source File: ppo.py    From BERT with Apache License 2.0 6 votes vote down vote up
def calculate_generalized_advantage_estimator(
    reward, value, done, gae_gamma, gae_lambda):
  # pylint: disable=g-doc-args
  """Generalized advantage estimator.

  Returns:
    GAE estimator. It will be one element shorter than the input; this is
    because to compute GAE for [0, ..., N-1] one needs V for [1, ..., N].
  """
  # pylint: enable=g-doc-args

  next_value = value[1:, :]
  next_not_done = 1 - tf.cast(done[1:, :], tf.float32)
  delta = (reward[:-1, :] + gae_gamma * next_value * next_not_done
           - value[:-1, :])

  return_ = tf.reverse(tf.scan(
      lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg,
      [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])],
      tf.zeros_like(delta[0, :]),
      parallel_iterations=1), [0])
  return tf.check_numerics(return_, "return") 
Example #18
Source File: policy.py    From Gun-Detector with Apache License 2.0 6 votes vote down vote up
def multi_step(self, all_obs, initial_state, all_actions):
    """Calculate log-probs and other calculations on batch of episodes."""
    batch_size = tf.shape(initial_state)[0]
    time_length = tf.shape(all_obs[0])[0]
    initial_actions = [act[0] for act in all_actions]
    all_actions = [tf.concat([act[1:], act[0:1]], 0)
                   for act in all_actions]  # "final" action is dummy

    (internal_states, _, logits, log_probs,
     entropies, self_kls) = tf.scan(
        self.single_step,
        (all_obs, all_actions),
        initializer=self.get_initializer(
            batch_size, initial_state, initial_actions))

    # remove "final" computations
    log_probs = [log_prob[:-1] for log_prob in log_probs]
    entropies = [entropy[:-1] for entropy in entropies]
    self_kls = [self_kl[:-1] for self_kl in self_kls]

    return internal_states, logits, log_probs, entropies, self_kls 
Example #19
Source File: TestUpd.py    From How-to-Learn-from-Little-Data with MIT License 6 votes vote down vote up
def omniglot():

    sess = tf.InteractiveSession()

    """    def wrapper(v):
        return tf.Print(v, [v], message="Printing v")

    v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix')

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp')
    temp = wrapper(v)
    #with tf.control_dependencies([temp]):
    temp.eval()
    print 'Hello'"""

    def update_tensor(V, dim2, val):  # Update tensor V, with index(:,dim2[:]) by val[:]
        val = tf.cast(val, V.dtype)
        def body(_, (v, d2, chg)):
            d2_int = tf.cast(d2, tf.int32)
            return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]])
        Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update")
        return Z 
Example #20
Source File: filter.py    From kvae with MIT License 6 votes vote down vote up
def backward_step_fn(self, params, inputs):
        """
        Backwards step over a batch, to be used in tf.scan
        :param params:
        :param inputs: (batch_size, variable dimensions)
        :return:
        """
        mu_back, Sigma_back = params
        mu_pred_tp1, Sigma_pred_tp1, mu_filt_t, Sigma_filt_t, A = inputs

        # J_t = tf.matmul(tf.reshape(tf.transpose(tf.matrix_inverse(Sigma_pred_tp1), [0, 2, 1]), [-1, self.dim_z]),
        #                 self.A)
        # J_t = tf.transpose(tf.reshape(J_t, [-1, self.dim_z, self.dim_z]), [0, 2, 1])
        J_t = tf.matmul(tf.transpose(A, [0, 2, 1]), tf.matrix_inverse(Sigma_pred_tp1))
        J_t = tf.matmul(Sigma_filt_t, J_t)

        mu_back = mu_filt_t + tf.matmul(J_t, mu_back - mu_pred_tp1)
        Sigma_back = Sigma_filt_t + tf.matmul(J_t, tf.matmul(Sigma_back - Sigma_pred_tp1, J_t, adjoint_b=True))

        return mu_back, Sigma_back 
Example #21
Source File: tf_lib.py    From phillip with GNU General Public License v3.0 5 votes vote down vote up
def scan(f, inputs, initial_state, axis=0):
  inputs = util.deepIter(util.deepMap(lambda t: iter(tf.unstack(t, axis=axis)), inputs))
  outputs = []
  output = initial_state
  for input_ in inputs:
    output = f(output, input_)
    outputs.append(output)
  return util.deepZipWith(lambda *ts: tf.stack(ts, axis=axis), *outputs) 
Example #22
Source File: planning.py    From planet with Apache License 2.0 5 votes vote down vote up
def cross_entropy_method(
    cell, objective_fn, state, obs_shape, action_shape, horizon, graph,
    amount=1000, topk=100, iterations=10, min_action=-1, max_action=1):
  obs_shape, action_shape = tuple(obs_shape), tuple(action_shape)
  original_batch = tools.shape(tools.nested.flatten(state)[0])[0]
  initial_state = tools.nested.map(lambda tensor: tf.tile(
      tensor, [amount] + [1] * (tensor.shape.ndims - 1)), state)
  extended_batch = tools.shape(tools.nested.flatten(initial_state)[0])[0]
  use_obs = tf.zeros([extended_batch, horizon, 1], tf.bool)
  obs = tf.zeros((extended_batch, horizon) + obs_shape)

  def iteration(mean_and_stddev, _):
    mean, stddev = mean_and_stddev
    # Sample action proposals from belief.
    normal = tf.random_normal((original_batch, amount, horizon) + action_shape)
    action = normal * stddev[:, None] + mean[:, None]
    action = tf.clip_by_value(action, min_action, max_action)
    # Evaluate proposal actions.
    action = tf.reshape(
        action, (extended_batch, horizon) + action_shape)
    (_, state), _ = tf.nn.dynamic_rnn(
        cell, (0 * obs, action, use_obs), initial_state=initial_state)
    return_ = objective_fn(state)
    return_ = tf.reshape(return_, (original_batch, amount))
    # Re-fit belief to the best ones.
    _, indices = tf.nn.top_k(return_, topk, sorted=False)
    indices += tf.range(original_batch)[:, None] * amount
    best_actions = tf.gather(action, indices)
    mean, variance = tf.nn.moments(best_actions, 1)
    stddev = tf.sqrt(variance + 1e-6)
    return mean, stddev

  mean = tf.zeros((original_batch, horizon) + action_shape)
  stddev = tf.ones((original_batch, horizon) + action_shape)
  if iterations < 1:
    return mean
  mean, stddev = tf.scan(
      iteration, tf.range(iterations), (mean, stddev), back_prop=False)
  mean, stddev = mean[-1], stddev[-1]  # Select belief at last iterations.
  return mean 
Example #23
Source File: HRDE_Model_v1.py    From QA_HRDE_LTC with MIT License 5 votes vote down vote up
def _add_memory_network(self):
        print '[launch] add memory network'
        
        with tf.name_scope('memory_network') as scope:
    
            # memory space for latent topic
            self.memory = tf.Variable(tf.random_uniform( [self.memory_dim, self.topic_size],
                                                       minval= -0.25,
                                                       maxval= 0.25,
                                                       dtype=tf.float32,
                                                       seed=None),
                                                       name="latent_topic_memory")

            
            self.memory_W = tf.Variable(tf.random_uniform( [self.hidden_dim, self.memory_dim],
                                                       minval= -0.25,
                                                       maxval= 0.25,
                                                       dtype=tf.float32,
                                                       seed=None),
                                                       name="latent_topic_memory")
            
            topic_sim_project = tf.matmul( self.final_encoder, self.memory_W )
            
            topic_sim = tf.matmul( topic_sim_project, self.memory )

            # normalize
            topic_sim_norm = tf.nn.softmax( logits=topic_sim, dim=-1)

            shaped_input = tf.reshape( topic_sim_norm, [self.batch_size, self.topic_size])
            topic_sim_mul_memory = tf.scan( lambda a, x : tf.multiply( self.memory, x ), shaped_input, initializer=self.memory)
            rsum = tf.reduce_sum( topic_sim_mul_memory, axis=-1)

            # final context 
            self.final_encoder = tf.concat( [self.final_encoder, rsum], axis=-1 )
            
        self.final_encoder_dimension   = self.hidden_dim_con + self.memory_dim   # concat 으로 늘어났음
        self.final_encoderR_dimension  = self.hidden_dim_con 
Example #24
Source File: DE_Model_mem_v1.py    From QA_HRDE_LTC with MIT License 5 votes vote down vote up
def _add_memory_network_same_dim(self):
        print '[launch] add memory network - 1st version SAME DIM (memory == hidden) / without output projection'
        
        with tf.name_scope('memory_network_v1_same_dim') as scope:
    
            # memory space for latent topic
            self.memory = tf.get_variable( "latent_topic_memory", 
                                      shape=[self.topic_size, self.hidden_dim],
                                      initializer=tf.orthogonal_initializer()
                                      )
            
            self.memory = tf.nn.dropout( self.memory, keep_prob=self.dr_memory_prob )

            # same dimension - No projection layer
            self.topic_sim_project = self.final_encoder
            
            self.topic_sim = tf.matmul( self.topic_sim_project, self.memory, transpose_b=True )

            # normalize
            self.topic_sim_sigmoid_softmax = tf.nn.softmax( logits=self.topic_sim, dim=-1)

            shaped_input = tf.reshape( self.topic_sim_sigmoid_softmax, [self.batch_size, self.topic_size])
            
            topic_sim_mul_memory = tf.scan( lambda a, x : tf.multiply( tf.transpose(self.memory), x ), shaped_input, initializer=tf.transpose(self.memory) )
            tmpT = tf.reduce_sum(topic_sim_mul_memory, axis=-1, keep_dims=True)
            tmpT2 = tf.transpose(tmpT, [0, 2, 1])

            self.rsum = tf.reshape( tmpT2, [self.batch_size, self.hidden_dim])

            # final context 
            self.final_encoder  = self.final_encoder + self.rsum
            
            
            self.final_encoder_dimension  = self.hidden_dim
            
            self.final_encoderR_dimension = self.hidden_dim 
Example #25
Source File: DE_Model_mem_v2.py    From QA_HRDE_LTC with MIT License 5 votes vote down vote up
def _add_memory_network_same_dim(self):
        print '[launch] add memory network - 1st version SAME DIM (memory == hidden) / without output projection'
        
        with tf.name_scope('memory_network_v1_same_dim') as scope:
    
            # memory space for latent topic
            self.memory = tf.get_variable( "latent_topic_memory", 
                                      shape=[self.topic_size, self.hidden_dim],
                                      initializer=tf.orthogonal_initializer()
                                      )
            
            self.memory = tf.nn.dropout( self.memory, keep_prob=self.dr_memory_prob )

            # same dimension - No projection layer
            self.topic_sim_project = self.final_encoder
            
            self.topic_sim = tf.matmul( self.topic_sim_project, self.memory, transpose_b=True )
            #topic_sim_sigmoid = tf.sigmoid( topic_sim )

            # normalize
            self.topic_sim_sigmoid_softmax = tf.nn.softmax( logits=self.topic_sim, dim=-1)

            shaped_input = tf.reshape( self.topic_sim_sigmoid_softmax, [self.batch_size, self.topic_size])
            
            topic_sim_mul_memory = tf.scan( lambda a, x : tf.multiply( tf.transpose(self.memory), x ), shaped_input, initializer=tf.transpose(self.memory) )
            tmpT = tf.reduce_sum(topic_sim_mul_memory, axis=-1, keep_dims=True)
            tmpT2 = tf.transpose(tmpT, [0, 2, 1])

            self.rsum = tf.reshape( tmpT2, [self.batch_size, self.hidden_dim])

            # final context 
            self.final_encoder  = self.final_encoder + self.rsum
            
            self.final_encoder_dimension  = self.hidden_dim
            self.final_encoderR_dimension = self.hidden_dim 
Example #26
Source File: HRDE_Model_mem_v2.py    From QA_HRDE_LTC with MIT License 5 votes vote down vote up
def deprecated_add_memory_network(self):
        print '[launch] add memory network - 1st version / without output projection'
        
        with tf.name_scope('memory_network_v1') as scope:
    
            # memory space for latent topic
            self.memory = tf.get_variable( "latent_topic_memory", 
                                          shape=[self.memory_dim, self.topic_size],
                                          initializer=tf.orthogonal_initializer()
                                          )
                                          
            
            self.memory_W = tf.Variable(tf.random_uniform( [self.hidden_dim, self.memory_dim],
                                                       minval= -0.25,
                                                       maxval= 0.25,
                                                       dtype=tf.float32,
                                                       seed=None),
                                                       name="memory_projection_W")
                                          
            self.memory_W = tf.nn.dropout( self.memory_W, keep_prob=self.dr_memory_prob )
            
            memory_bias = tf.Variable(tf.zeros([1], dtype=tf.float32), name="memory_projection_bias")

            topic_sim_project = tf.matmul( self.final_encoder, self.memory_W ) + memory_bias
            
            topic_sim = tf.matmul( topic_sim_project, self.memory )

            # normalize
            self.topic_sim_sigmoid_softmax = tf.nn.softmax( logits=topic_sim, dim=-1)

            shaped_input = tf.reshape( self.topic_sim_sigmoid_softmax, [self.batch_size, self.topic_size])
            topic_sim_mul_memory = tf.scan( lambda a, x : tf.multiply( self.memory, x ), shaped_input, initializer=self.memory)
            self.rsum = tf.reduce_sum( topic_sim_mul_memory, axis=-1)

            # final context 
            self.final_encoder = tf.concat( [self.final_encoder, self.rsum], axis=-1 )
            
        self.final_encoder_dimension   = self.hidden_dim_con + self.memory_dim   # concat 으로 늘어났음
        self.final_encoderR_dimension  = self.hidden_dim_con 
Example #27
Source File: simulate.py    From planet with Apache License 2.0 5 votes vote down vote up
def collect_rollouts(
    step, env_ctor, duration, num_agents, agent_config, isolate_envs):
  batch_env = define_batch_env(env_ctor, num_agents, isolate_envs)
  agent = mpc_agent.MPCAgent(batch_env, step, False, False, agent_config)
  cleanup = lambda: batch_env.close()

  def simulate_fn(unused_last, step):
    done, score, unused_summary = simulate_step(
        batch_env, agent,
        log=False,
        reset=tf.equal(step, 0))
    with tf.control_dependencies([done, score]):
      image = batch_env.observ
      batch_action = batch_env.action
      batch_reward = batch_env.reward
    return done, score, image, batch_action, batch_reward

  initializer = (
      tf.zeros([num_agents], tf.bool),
      tf.zeros([num_agents], tf.float32),
      0 * batch_env.observ,
      0 * batch_env.action,
      tf.zeros([num_agents], tf.float32))
  done, score, image, action, reward = tf.scan(
      simulate_fn, tf.range(duration),
      initializer, parallel_iterations=1)
  score = tf.boolean_mask(score, done)
  image = tf.transpose(image, [1, 0, 2, 3, 4])
  action = tf.transpose(action, [1, 0, 2])
  reward = tf.transpose(reward)
  return score, image, action, reward, cleanup 
Example #28
Source File: neural_gpu.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def diagonal_neural_gpu(inputs, hparams, name=None):
  """Improved Neural GPU as in https://arxiv.org/abs/1702.08727."""
  with tf.variable_scope(name, "diagonal_neural_gpu"):

    def step(state_tup, inp):
      """Single step of the improved Neural GPU."""
      state, _ = state_tup
      x = state
      for layer in range(hparams.num_hidden_layers):
        x, new_loss = common_layers.diagonal_conv_gru(
            x, (hparams.kernel_height, hparams.kernel_width),
            hparams.hidden_size,
            dropout=hparams.dropout,
            name="dcgru_%d" % layer)
      # Padding input is zeroed-out in the modality, we check this by summing.
      padding_inp = tf.less(tf.reduce_sum(tf.abs(inp), axis=[1, 2]), 0.00001)
      new_state = tf.where(padding_inp, state, x)  # No-op where inp is padding.
      return new_state, new_loss

    final_state, losses = tf.scan(
        step,
        tf.transpose(inputs, [1, 0, 2, 3]),
        initializer=(inputs, tf.constant(0.0)),
        parallel_iterations=1,
        swap_memory=True)
    return final_state[0, :, :, :, :], 2.0 * tf.reduce_mean(losses) 
Example #29
Source File: common_layers.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def running_global_pool_1d(inputs, pooling_type="MAX"):
  """Same global pool, but only for the elements up to the current element.

  Useful for outputs where the state of future elements is not known.
  Takes no mask as all elements up to the current element are assumed to exist.
  Currently only supports maximum. Equivalent to using a lower triangle bias.

  Args:
    inputs: A tensor of shape [batch_size, sequence_length, input_dims]
      containing the sequences of input vectors.
    pooling_type: Pooling type to use. Currently only supports 'MAX'.

  Returns:
    A tensor of shape [batch_size, sequence_length, input_dims] containing the
    running 'totals'.
  """
  del pooling_type
  with tf.name_scope("running_global_pool", values=[inputs]):
    scan_fct = tf.maximum
    # Permute inputs so seq_length is first.
    elems = tf.transpose(inputs, [1, 0, 2])
    # Perform scan.
    cumulatives = tf.scan(scan_fct, elems, swap_memory=True)
    # Permute output to get back to original order.
    output = tf.transpose(cumulatives, [1, 0, 2])
  return output 
Example #30
Source File: temporal_difference.py    From planet with Apache License 2.0 5 votes vote down vote up
def lambda_return(
    reward, value, bootstrap, discount, lambda_, axis, stop_gradient=True):
  """Average of different multi-step returns.

  Setting lambda=1 gives a discounted Monte Carlo return.
  Setting lambda=0 gives a fixed 1-step return.
  """
  assert reward.shape.ndims == value.shape.ndims, (reward.shape, value.shape)
  # Bring the aggregation dimension front.
  dims = list(range(reward.shape.ndims))
  dims = [axis] + dims[1:axis] + [0] + dims[axis + 1:]
  reward = tf.transpose(reward, dims)
  value = tf.transpose(value, dims)
  if bootstrap is None:
    bootstrap = tf.zeros_like(value[-1])
  next_values = tf.concat([value[1:], bootstrap[None]], 0)
  inputs = reward + discount * next_values * (1 - lambda_)
  return_ = tf.scan(
      fn=lambda agg, cur: cur + discount * lambda_ * agg,
      elems=inputs,
      initializer=bootstrap,
      back_prop=not stop_gradient,
      reverse=True)
  return_ = tf.transpose(return_, dims)
  if stop_gradient:
    return_ = tf.stop_gradient(return_)
  return return_