Python tensorflow.scan() Examples
The following are 30
code examples of tensorflow.scan().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: ppo.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def calculate_generalized_advantage_estimator( reward, value, done, gae_gamma, gae_lambda): # pylint: disable=g-doc-args """Generalized advantage estimator. Returns: GAE estimator. It will be one element shorter than the input; this is because to compute GAE for [0, ..., N-1] one needs V for [1, ..., N]. """ # pylint: enable=g-doc-args next_value = value[1:, :] next_not_done = 1 - tf.cast(done[1:, :], tf.float32) delta = (reward[:-1, :] + gae_gamma * next_value * next_not_done - value[:-1, :]) return_ = tf.reverse(tf.scan( lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg, [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])], tf.zeros_like(delta[0, :]), parallel_iterations=1), [0]) return tf.check_numerics(return_, "return")
Example #2
Source File: temporal_difference.py From planet with Apache License 2.0 | 6 votes |
def discounted_return(reward, discount, bootstrap, axis, stop_gradient=True): """Discounted Monte Carlo return.""" if discount == 1 and bootstrap is None: return tf.reduce_sum(reward, axis) if discount == 1: return tf.reduce_sum(reward, axis) + bootstrap # Bring the aggregation dimension front. dims = list(range(reward.shape.ndims)) dims = [axis] + dims[1:axis] + [0] + dims[axis + 1:] reward = tf.transpose(reward, dims) if bootstrap is None: bootstrap = tf.zeros_like(reward[-1]) return_ = tf.scan( fn=lambda agg, cur: cur + discount * agg, elems=reward, initializer=bootstrap, back_prop=not stop_gradient, reverse=True) return_ = tf.transpose(return_, dims) if stop_gradient: return_ = tf.stop_gradient(return_) return return_
Example #3
Source File: policy.py From DOTA_models with Apache License 2.0 | 6 votes |
def multi_step(self, all_obs, initial_state, all_actions): """Calculate log-probs and other calculations on batch of episodes.""" batch_size = tf.shape(initial_state)[0] time_length = tf.shape(all_obs[0])[0] initial_actions = [act[0] for act in all_actions] all_actions = [tf.concat([act[1:], act[0:1]], 0) for act in all_actions] # "final" action is dummy (internal_states, _, logits, log_probs, entropies, self_kls) = tf.scan( self.single_step, (all_obs, all_actions), initializer=self.get_initializer( batch_size, initial_state, initial_actions)) # remove "final" computations log_probs = [log_prob[:-1] for log_prob in log_probs] entropies = [entropy[:-1] for entropy in entropies] self_kls = [self_kl[:-1] for self_kl in self_kls] return internal_states, logits, log_probs, entropies, self_kls
Example #4
Source File: ppo.py From fine-lm with MIT License | 6 votes |
def calculate_generalized_advantage_estimator( reward, value, done, gae_gamma, gae_lambda): """Generalized advantage estimator.""" # Below is slight weirdness, we set the last reward to 0. # This makes the advantage to be 0 in the last timestep reward = tf.concat([reward[:-1, :], value[-1:, :]], axis=0) next_value = tf.concat([value[1:, :], tf.zeros_like(value[-1:, :])], axis=0) next_not_done = 1 - tf.cast(tf.concat([done[1:, :], tf.zeros_like(done[-1:, :])], axis=0), tf.float32) delta = reward + gae_gamma * next_value * next_not_done - value return_ = tf.reverse(tf.scan( lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg, [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])], tf.zeros_like(delta[0, :]), parallel_iterations=1), [0]) return tf.check_numerics(return_, "return")
Example #5
Source File: tf_atari_wrappers.py From fine-lm with MIT License | 6 votes |
def simulate(self, action): with tf.name_scope("environment/simulate"): # Do we need this? initializer = (tf.zeros_like(self._observ), tf.fill((len(self),), 0.0), tf.fill((len(self),), False)) def not_done_step(a, _): reward, done = self._batch_env.simulate(action) with tf.control_dependencies([reward, done]): # TODO(piotrmilos): possibly ignore envs with done r0 = tf.maximum(a[0], self._batch_env.observ) r1 = tf.add(a[1], reward) r2 = tf.logical_or(a[2], done) return (r0, r1, r2) simulate_ret = tf.scan(not_done_step, tf.range(self.skip), initializer=initializer, parallel_iterations=1, infer_shape=False) simulate_ret = [ret[-1, ...] for ret in simulate_ret] with tf.control_dependencies([self._observ.assign(simulate_ret[0])]): return tf.identity(simulate_ret[1]), tf.identity(simulate_ret[2])
Example #6
Source File: tf_atari_wrappers.py From fine-lm with MIT License | 6 votes |
def simulate(self, action): with tf.name_scope("environment/simulate"): # Do we need this? initializer = (tf.zeros(self.old_shape, dtype=tf.float32), tf.fill((len(self),), 0.0), tf.fill((len(self),), False)) def not_done_step(a, _): reward, done = self._batch_env.simulate(action) with tf.control_dependencies([reward, done]): r0 = self._batch_env.observ + 0 r1 = tf.add(a[1], reward) r2 = tf.logical_or(a[2], done) return (r0, r1, r2) simulate_ret = tf.scan(not_done_step, tf.range(self.skip), initializer=initializer, parallel_iterations=1, infer_shape=False) observations, rewards, dones = simulate_ret split_observations = tf.split(observations, self.skip, axis=0) split_observations = [tf.squeeze(o, axis=0) for o in split_observations] observation = tf.concat(split_observations, axis=-1) with tf.control_dependencies([self._observ.assign(observation)]): return tf.identity(rewards[-1, ...]), tf.identity(dones[-1, ...])
Example #7
Source File: algorithm.py From soccer-matlab with BSD 2-Clause "Simplified" License | 6 votes |
def _update_value(self, observ, reward, length): """Perform multiple update steps of the value baseline. We need to decide for the summary of one iteration, and thus choose the one after half of the iterations. Args: observ: Sequences of observations. reward: Sequences of reward. length: Batch of sequence lengths. Returns: Summary tensor. """ with tf.name_scope('update_value'): loss, summary = tf.scan( lambda _1, _2: self._update_value_step(observ, reward, length), tf.range(self._config.update_epochs_value), [0., ''], parallel_iterations=1) print_loss = tf.Print(0, [tf.reduce_mean(loss)], 'value loss: ') with tf.control_dependencies([loss, print_loss]): return summary[self._config.update_epochs_value // 2]
Example #8
Source File: functional_ops_test.py From deep_image_model with Apache License 2.0 | 6 votes |
def testScanFoldl_Nested(self): with self.test_session(): elems = tf.constant([1.0, 2.0, 3.0, 4.0], name="data") inner_elems = tf.constant([0.5, 0.5], name="data") def r_inner(a, x): return tf.foldl(lambda b, y: b * y * x, inner_elems, initializer=a) r = tf.scan(r_inner, elems) # t == 0 (returns 1) # t == 1, a == 1, x == 2 (returns 1) # t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1 # t_1 == 1, b == 1, y == 0.5, returns b * y * x = 1 # t == 2, a == 1, x == 3 (returns 1.5*1.5 == 2.25) # t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1.5 # t_1 == 1, b == 1.5, y == 0.5, returns b * y * x = 1.5*1.5 # t == 3, a == 2.25, x == 4 (returns 9) # t_0 == 0, b == a == 2.25, y == 0.5, returns b * y * x = 4.5 # t_1 == 1, b == 4.5, y == 0.5, returns b * y * x = 9 self.assertAllClose([1., 1., 2.25, 9.], r.eval())
Example #9
Source File: functional_ops_test.py From deep_image_model with Apache License 2.0 | 6 votes |
def testScan_Scoped(self): with self.test_session() as sess: with tf.variable_scope("root") as varscope: elems = tf.constant([1, 2, 3, 4, 5, 6], name="data") r = tf.scan(simple_scoped_fn, elems) # Check that we have the one variable we asked for here. self.assertEqual(len(tf.trainable_variables()), 1) self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0") sess.run([tf.global_variables_initializer()]) results = np.array([1, 6, 18, 44, 98, 208]) self.assertAllEqual(results, r.eval()) # Now let's reuse our single variable. varscope.reuse_variables() r = tf.scan(simple_scoped_fn, elems, initializer=2) self.assertEqual(len(tf.trainable_variables()), 1) results = np.array([6, 16, 38, 84, 178, 368]) self.assertAllEqual(results, r.eval())
Example #10
Source File: TestUpd.py From NTM-One-Shot-TF with MIT License | 6 votes |
def omniglot(): sess = tf.InteractiveSession() """ def wrapper(v): return tf.Print(v, [v], message="Printing v") v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp') temp = wrapper(v) #with tf.control_dependencies([temp]): temp.eval() print 'Hello'""" def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:] val = tf.cast(val, V.dtype) def body(_, (v, d2, chg)): d2_int = tf.cast(d2, tf.int32) return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]]) Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update") return Z
Example #11
Source File: filter.py From kvae with MIT License | 6 votes |
def compute_forwards(self, reuse=None): """Compute the forward step in the Kalman filter. The forward pass is intialized with p(z_1)=N(self.mu, self.Sigma). We then return the mean and covariances of the predictive distribution p(z_t|z_tm1,u_t), t=2,..T+1 and the filtering distribution p(z_t|x_1:t,u_1:t), t=1,..T We follow the notation of Murphy's book, section 18.3.1 """ # To make sure we are not accidentally using the real outputs in the steps with missing values, set them to 0. y_masked = tf.multiply(tf.expand_dims(self.mask, 2), self.y) inputs = tf.concat([y_masked, self.u, tf.expand_dims(self.mask, 2)], axis=2) y_prev = tf.expand_dims(self.y_0, 0) # (1, dim_y) y_prev = tf.tile(y_prev, (tf.shape(self.mu)[0], 1)) alpha, state, u, buffer = self.alpha(y_prev, self.state, self.u[:, 0], init_buffer=True, reuse= reuse) # dummy matrix to initialize B and C in scan dummy_init_A = tf.ones([self.Sigma.get_shape()[0], self.dim_z, self.dim_z]) dummy_init_B = tf.ones([self.Sigma.get_shape()[0], self.dim_z, self.dim_u]) dummy_init_C = tf.ones([self.Sigma.get_shape()[0], self.dim_y, self.dim_z]) forward_states = tf.scan(self.forward_step_fn, tf.transpose(inputs, [1, 0, 2]), initializer=(self.mu, self.Sigma, self.mu, self.Sigma, alpha, u, state, buffer, dummy_init_A, dummy_init_B, dummy_init_C), parallel_iterations=1, name='forward') return forward_states
Example #12
Source File: rewards.py From Counterfactual-StoryRW with MIT License | 6 votes |
def _discount_reward_tensor_2d(reward, sequence_length=None, discount=1., dtype=None): if sequence_length is not None: reward = mask_sequences( reward, sequence_length, dtype=dtype, tensor_rank=2) if discount == 1.: disc_reward = tf.cumsum(reward, axis=1, reverse=True) else: # [max_time, batch_size] rev_reward_T = tf.transpose(tf.reverse(reward, [1]), [1, 0]) rev_reward_T_cum = tf.scan( fn=lambda acc, cur: cur + discount * acc, elems=rev_reward_T, initializer=tf.zeros_like(reward[:, 1]), back_prop=False) disc_reward = tf.reverse( tf.transpose(rev_reward_T_cum, [1, 0]), [1]) return disc_reward
Example #13
Source File: policy.py From yolo_v2 with Apache License 2.0 | 6 votes |
def multi_step(self, all_obs, initial_state, all_actions): """Calculate log-probs and other calculations on batch of episodes.""" batch_size = tf.shape(initial_state)[0] time_length = tf.shape(all_obs[0])[0] initial_actions = [act[0] for act in all_actions] all_actions = [tf.concat([act[1:], act[0:1]], 0) for act in all_actions] # "final" action is dummy (internal_states, _, logits, log_probs, entropies, self_kls) = tf.scan( self.single_step, (all_obs, all_actions), initializer=self.get_initializer( batch_size, initial_state, initial_actions)) # remove "final" computations log_probs = [log_prob[:-1] for log_prob in log_probs] entropies = [entropy[:-1] for entropy in entropies] self_kls = [self_kl[:-1] for self_kl in self_kls] return internal_states, logits, log_probs, entropies, self_kls
Example #14
Source File: ppo.py From training_results_v0.5 with Apache License 2.0 | 6 votes |
def calculate_generalized_advantage_estimator( reward, value, done, gae_gamma, gae_lambda): # pylint: disable=g-doc-args """Generalized advantage estimator. Returns: GAE estimator. It will be one element shorter than the input; this is because to compute GAE for [0, ..., N-1] one needs V for [1, ..., N]. """ # pylint: enable=g-doc-args next_value = value[1:, :] next_not_done = 1 - tf.cast(done[1:, :], tf.float32) delta = (reward[:-1, :] + gae_gamma * next_value * next_not_done - value[:-1, :]) return_ = tf.reverse(tf.scan( lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg, [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])], tf.zeros_like(delta[0, :]), parallel_iterations=1), [0]) return tf.check_numerics(return_, "return")
Example #15
Source File: embeddings.py From inferbeddings with MIT License | 6 votes |
def bilinear_diagonal_walk_embedding(predicate_embeddings): """ Takes a walk, represented by a 3D Tensor with shape (batch_size, walk_length, embedding_length), and computes its embedding using a simple bilinear diagonal models. This method is roughly equivalent to: > walk_embedding = tf.reduce_prod(predicate_embeddings, axis=1) :param predicate_embeddings: 3D Tensor containing the embedding of the predicates in the walk. :return: 2D tensor of size (batch_size, embedding_length) containing the walk embeddings. """ batch_size, embedding_len = tf.shape(predicate_embeddings)[0], tf.shape(predicate_embeddings)[2] # Transpose the (batch_size, walk_length, n) Tensor in a (walk_length, batch_size, n) Tensor transposed_embedding_matrix = tf.transpose(predicate_embeddings, perm=[1, 0, 2]) # Define the initializer of the scan procedure - an all-ones matrix # where one is the neutral element wrt. the element-wise product initializer = tf.ones((batch_size, embedding_len), dtype=predicate_embeddings.dtype) # The walk embeddings are given by the element-wise product of the predicate embeddings walk_embedding = tf.scan(lambda x, y: x * y, transposed_embedding_matrix, initializer=initializer) # Add the initializer as the first step in the scan sequence, in case the walk has zero-length return tf.concat(values=[tf.expand_dims(initializer, 0), walk_embedding], axis=0)[-1]
Example #16
Source File: embeddings.py From inferbeddings with MIT License | 6 votes |
def additive_walk_embedding(predicate_embeddings): """ Takes a walk, represented by a 3D Tensor with shape (batch_size, walk_length, embedding_length), and computes its embedding using a simple additive models. This method is roughly equivalent to: > walk_embedding = tf.reduce_prod(predicate_embeddings, axis=1) :param predicate_embeddings: 3D Tensor containing the embedding of the predicates in the walk. :return: 2D tensor of size (batch_size, embedding_length) containing the walk embeddings. """ batch_size, embedding_len = tf.shape(predicate_embeddings)[0], tf.shape(predicate_embeddings)[2] # Transpose the (batch_size, walk_length, n) Tensor in a (walk_length, batch_size, n) Tensor transposed_embedding_matrix = tf.transpose(predicate_embeddings, perm=[1, 0, 2]) # Define the initializer of the scan procedure - an all-zeros matrix initializer = tf.zeros((batch_size, embedding_len), dtype=predicate_embeddings.dtype) # The walk embeddings are given by the sum of the predicate embeddings # where zero is the neutral element wrt. the element-wise sum walk_embedding = tf.scan(lambda x, y: x + y, transposed_embedding_matrix, initializer=initializer) # Add the initializer as the first step in the scan sequence, in case the walk has zero-length return tf.concat(values=[tf.expand_dims(initializer, 0), walk_embedding], axis=0)[-1]
Example #17
Source File: ppo.py From BERT with Apache License 2.0 | 6 votes |
def calculate_generalized_advantage_estimator( reward, value, done, gae_gamma, gae_lambda): # pylint: disable=g-doc-args """Generalized advantage estimator. Returns: GAE estimator. It will be one element shorter than the input; this is because to compute GAE for [0, ..., N-1] one needs V for [1, ..., N]. """ # pylint: enable=g-doc-args next_value = value[1:, :] next_not_done = 1 - tf.cast(done[1:, :], tf.float32) delta = (reward[:-1, :] + gae_gamma * next_value * next_not_done - value[:-1, :]) return_ = tf.reverse(tf.scan( lambda agg, cur: cur[0] + cur[1] * gae_gamma * gae_lambda * agg, [tf.reverse(delta, [0]), tf.reverse(next_not_done, [0])], tf.zeros_like(delta[0, :]), parallel_iterations=1), [0]) return tf.check_numerics(return_, "return")
Example #18
Source File: policy.py From Gun-Detector with Apache License 2.0 | 6 votes |
def multi_step(self, all_obs, initial_state, all_actions): """Calculate log-probs and other calculations on batch of episodes.""" batch_size = tf.shape(initial_state)[0] time_length = tf.shape(all_obs[0])[0] initial_actions = [act[0] for act in all_actions] all_actions = [tf.concat([act[1:], act[0:1]], 0) for act in all_actions] # "final" action is dummy (internal_states, _, logits, log_probs, entropies, self_kls) = tf.scan( self.single_step, (all_obs, all_actions), initializer=self.get_initializer( batch_size, initial_state, initial_actions)) # remove "final" computations log_probs = [log_prob[:-1] for log_prob in log_probs] entropies = [entropy[:-1] for entropy in entropies] self_kls = [self_kl[:-1] for self_kl in self_kls] return internal_states, logits, log_probs, entropies, self_kls
Example #19
Source File: TestUpd.py From How-to-Learn-from-Little-Data with MIT License | 6 votes |
def omniglot(): sess = tf.InteractiveSession() """ def wrapper(v): return tf.Print(v, [v], message="Printing v") v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp') temp = wrapper(v) #with tf.control_dependencies([temp]): temp.eval() print 'Hello'""" def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:] val = tf.cast(val, V.dtype) def body(_, (v, d2, chg)): d2_int = tf.cast(d2, tf.int32) return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]]) Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update") return Z
Example #20
Source File: filter.py From kvae with MIT License | 6 votes |
def backward_step_fn(self, params, inputs): """ Backwards step over a batch, to be used in tf.scan :param params: :param inputs: (batch_size, variable dimensions) :return: """ mu_back, Sigma_back = params mu_pred_tp1, Sigma_pred_tp1, mu_filt_t, Sigma_filt_t, A = inputs # J_t = tf.matmul(tf.reshape(tf.transpose(tf.matrix_inverse(Sigma_pred_tp1), [0, 2, 1]), [-1, self.dim_z]), # self.A) # J_t = tf.transpose(tf.reshape(J_t, [-1, self.dim_z, self.dim_z]), [0, 2, 1]) J_t = tf.matmul(tf.transpose(A, [0, 2, 1]), tf.matrix_inverse(Sigma_pred_tp1)) J_t = tf.matmul(Sigma_filt_t, J_t) mu_back = mu_filt_t + tf.matmul(J_t, mu_back - mu_pred_tp1) Sigma_back = Sigma_filt_t + tf.matmul(J_t, tf.matmul(Sigma_back - Sigma_pred_tp1, J_t, adjoint_b=True)) return mu_back, Sigma_back
Example #21
Source File: tf_lib.py From phillip with GNU General Public License v3.0 | 5 votes |
def scan(f, inputs, initial_state, axis=0): inputs = util.deepIter(util.deepMap(lambda t: iter(tf.unstack(t, axis=axis)), inputs)) outputs = [] output = initial_state for input_ in inputs: output = f(output, input_) outputs.append(output) return util.deepZipWith(lambda *ts: tf.stack(ts, axis=axis), *outputs)
Example #22
Source File: planning.py From planet with Apache License 2.0 | 5 votes |
def cross_entropy_method( cell, objective_fn, state, obs_shape, action_shape, horizon, graph, amount=1000, topk=100, iterations=10, min_action=-1, max_action=1): obs_shape, action_shape = tuple(obs_shape), tuple(action_shape) original_batch = tools.shape(tools.nested.flatten(state)[0])[0] initial_state = tools.nested.map(lambda tensor: tf.tile( tensor, [amount] + [1] * (tensor.shape.ndims - 1)), state) extended_batch = tools.shape(tools.nested.flatten(initial_state)[0])[0] use_obs = tf.zeros([extended_batch, horizon, 1], tf.bool) obs = tf.zeros((extended_batch, horizon) + obs_shape) def iteration(mean_and_stddev, _): mean, stddev = mean_and_stddev # Sample action proposals from belief. normal = tf.random_normal((original_batch, amount, horizon) + action_shape) action = normal * stddev[:, None] + mean[:, None] action = tf.clip_by_value(action, min_action, max_action) # Evaluate proposal actions. action = tf.reshape( action, (extended_batch, horizon) + action_shape) (_, state), _ = tf.nn.dynamic_rnn( cell, (0 * obs, action, use_obs), initial_state=initial_state) return_ = objective_fn(state) return_ = tf.reshape(return_, (original_batch, amount)) # Re-fit belief to the best ones. _, indices = tf.nn.top_k(return_, topk, sorted=False) indices += tf.range(original_batch)[:, None] * amount best_actions = tf.gather(action, indices) mean, variance = tf.nn.moments(best_actions, 1) stddev = tf.sqrt(variance + 1e-6) return mean, stddev mean = tf.zeros((original_batch, horizon) + action_shape) stddev = tf.ones((original_batch, horizon) + action_shape) if iterations < 1: return mean mean, stddev = tf.scan( iteration, tf.range(iterations), (mean, stddev), back_prop=False) mean, stddev = mean[-1], stddev[-1] # Select belief at last iterations. return mean
Example #23
Source File: HRDE_Model_v1.py From QA_HRDE_LTC with MIT License | 5 votes |
def _add_memory_network(self): print '[launch] add memory network' with tf.name_scope('memory_network') as scope: # memory space for latent topic self.memory = tf.Variable(tf.random_uniform( [self.memory_dim, self.topic_size], minval= -0.25, maxval= 0.25, dtype=tf.float32, seed=None), name="latent_topic_memory") self.memory_W = tf.Variable(tf.random_uniform( [self.hidden_dim, self.memory_dim], minval= -0.25, maxval= 0.25, dtype=tf.float32, seed=None), name="latent_topic_memory") topic_sim_project = tf.matmul( self.final_encoder, self.memory_W ) topic_sim = tf.matmul( topic_sim_project, self.memory ) # normalize topic_sim_norm = tf.nn.softmax( logits=topic_sim, dim=-1) shaped_input = tf.reshape( topic_sim_norm, [self.batch_size, self.topic_size]) topic_sim_mul_memory = tf.scan( lambda a, x : tf.multiply( self.memory, x ), shaped_input, initializer=self.memory) rsum = tf.reduce_sum( topic_sim_mul_memory, axis=-1) # final context self.final_encoder = tf.concat( [self.final_encoder, rsum], axis=-1 ) self.final_encoder_dimension = self.hidden_dim_con + self.memory_dim # concat 으로 늘어났음 self.final_encoderR_dimension = self.hidden_dim_con
Example #24
Source File: DE_Model_mem_v1.py From QA_HRDE_LTC with MIT License | 5 votes |
def _add_memory_network_same_dim(self): print '[launch] add memory network - 1st version SAME DIM (memory == hidden) / without output projection' with tf.name_scope('memory_network_v1_same_dim') as scope: # memory space for latent topic self.memory = tf.get_variable( "latent_topic_memory", shape=[self.topic_size, self.hidden_dim], initializer=tf.orthogonal_initializer() ) self.memory = tf.nn.dropout( self.memory, keep_prob=self.dr_memory_prob ) # same dimension - No projection layer self.topic_sim_project = self.final_encoder self.topic_sim = tf.matmul( self.topic_sim_project, self.memory, transpose_b=True ) # normalize self.topic_sim_sigmoid_softmax = tf.nn.softmax( logits=self.topic_sim, dim=-1) shaped_input = tf.reshape( self.topic_sim_sigmoid_softmax, [self.batch_size, self.topic_size]) topic_sim_mul_memory = tf.scan( lambda a, x : tf.multiply( tf.transpose(self.memory), x ), shaped_input, initializer=tf.transpose(self.memory) ) tmpT = tf.reduce_sum(topic_sim_mul_memory, axis=-1, keep_dims=True) tmpT2 = tf.transpose(tmpT, [0, 2, 1]) self.rsum = tf.reshape( tmpT2, [self.batch_size, self.hidden_dim]) # final context self.final_encoder = self.final_encoder + self.rsum self.final_encoder_dimension = self.hidden_dim self.final_encoderR_dimension = self.hidden_dim
Example #25
Source File: DE_Model_mem_v2.py From QA_HRDE_LTC with MIT License | 5 votes |
def _add_memory_network_same_dim(self): print '[launch] add memory network - 1st version SAME DIM (memory == hidden) / without output projection' with tf.name_scope('memory_network_v1_same_dim') as scope: # memory space for latent topic self.memory = tf.get_variable( "latent_topic_memory", shape=[self.topic_size, self.hidden_dim], initializer=tf.orthogonal_initializer() ) self.memory = tf.nn.dropout( self.memory, keep_prob=self.dr_memory_prob ) # same dimension - No projection layer self.topic_sim_project = self.final_encoder self.topic_sim = tf.matmul( self.topic_sim_project, self.memory, transpose_b=True ) #topic_sim_sigmoid = tf.sigmoid( topic_sim ) # normalize self.topic_sim_sigmoid_softmax = tf.nn.softmax( logits=self.topic_sim, dim=-1) shaped_input = tf.reshape( self.topic_sim_sigmoid_softmax, [self.batch_size, self.topic_size]) topic_sim_mul_memory = tf.scan( lambda a, x : tf.multiply( tf.transpose(self.memory), x ), shaped_input, initializer=tf.transpose(self.memory) ) tmpT = tf.reduce_sum(topic_sim_mul_memory, axis=-1, keep_dims=True) tmpT2 = tf.transpose(tmpT, [0, 2, 1]) self.rsum = tf.reshape( tmpT2, [self.batch_size, self.hidden_dim]) # final context self.final_encoder = self.final_encoder + self.rsum self.final_encoder_dimension = self.hidden_dim self.final_encoderR_dimension = self.hidden_dim
Example #26
Source File: HRDE_Model_mem_v2.py From QA_HRDE_LTC with MIT License | 5 votes |
def deprecated_add_memory_network(self): print '[launch] add memory network - 1st version / without output projection' with tf.name_scope('memory_network_v1') as scope: # memory space for latent topic self.memory = tf.get_variable( "latent_topic_memory", shape=[self.memory_dim, self.topic_size], initializer=tf.orthogonal_initializer() ) self.memory_W = tf.Variable(tf.random_uniform( [self.hidden_dim, self.memory_dim], minval= -0.25, maxval= 0.25, dtype=tf.float32, seed=None), name="memory_projection_W") self.memory_W = tf.nn.dropout( self.memory_W, keep_prob=self.dr_memory_prob ) memory_bias = tf.Variable(tf.zeros([1], dtype=tf.float32), name="memory_projection_bias") topic_sim_project = tf.matmul( self.final_encoder, self.memory_W ) + memory_bias topic_sim = tf.matmul( topic_sim_project, self.memory ) # normalize self.topic_sim_sigmoid_softmax = tf.nn.softmax( logits=topic_sim, dim=-1) shaped_input = tf.reshape( self.topic_sim_sigmoid_softmax, [self.batch_size, self.topic_size]) topic_sim_mul_memory = tf.scan( lambda a, x : tf.multiply( self.memory, x ), shaped_input, initializer=self.memory) self.rsum = tf.reduce_sum( topic_sim_mul_memory, axis=-1) # final context self.final_encoder = tf.concat( [self.final_encoder, self.rsum], axis=-1 ) self.final_encoder_dimension = self.hidden_dim_con + self.memory_dim # concat 으로 늘어났음 self.final_encoderR_dimension = self.hidden_dim_con
Example #27
Source File: simulate.py From planet with Apache License 2.0 | 5 votes |
def collect_rollouts( step, env_ctor, duration, num_agents, agent_config, isolate_envs): batch_env = define_batch_env(env_ctor, num_agents, isolate_envs) agent = mpc_agent.MPCAgent(batch_env, step, False, False, agent_config) cleanup = lambda: batch_env.close() def simulate_fn(unused_last, step): done, score, unused_summary = simulate_step( batch_env, agent, log=False, reset=tf.equal(step, 0)) with tf.control_dependencies([done, score]): image = batch_env.observ batch_action = batch_env.action batch_reward = batch_env.reward return done, score, image, batch_action, batch_reward initializer = ( tf.zeros([num_agents], tf.bool), tf.zeros([num_agents], tf.float32), 0 * batch_env.observ, 0 * batch_env.action, tf.zeros([num_agents], tf.float32)) done, score, image, action, reward = tf.scan( simulate_fn, tf.range(duration), initializer, parallel_iterations=1) score = tf.boolean_mask(score, done) image = tf.transpose(image, [1, 0, 2, 3, 4]) action = tf.transpose(action, [1, 0, 2]) reward = tf.transpose(reward) return score, image, action, reward, cleanup
Example #28
Source File: neural_gpu.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def diagonal_neural_gpu(inputs, hparams, name=None): """Improved Neural GPU as in https://arxiv.org/abs/1702.08727.""" with tf.variable_scope(name, "diagonal_neural_gpu"): def step(state_tup, inp): """Single step of the improved Neural GPU.""" state, _ = state_tup x = state for layer in range(hparams.num_hidden_layers): x, new_loss = common_layers.diagonal_conv_gru( x, (hparams.kernel_height, hparams.kernel_width), hparams.hidden_size, dropout=hparams.dropout, name="dcgru_%d" % layer) # Padding input is zeroed-out in the modality, we check this by summing. padding_inp = tf.less(tf.reduce_sum(tf.abs(inp), axis=[1, 2]), 0.00001) new_state = tf.where(padding_inp, state, x) # No-op where inp is padding. return new_state, new_loss final_state, losses = tf.scan( step, tf.transpose(inputs, [1, 0, 2, 3]), initializer=(inputs, tf.constant(0.0)), parallel_iterations=1, swap_memory=True) return final_state[0, :, :, :, :], 2.0 * tf.reduce_mean(losses)
Example #29
Source File: common_layers.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def running_global_pool_1d(inputs, pooling_type="MAX"): """Same global pool, but only for the elements up to the current element. Useful for outputs where the state of future elements is not known. Takes no mask as all elements up to the current element are assumed to exist. Currently only supports maximum. Equivalent to using a lower triangle bias. Args: inputs: A tensor of shape [batch_size, sequence_length, input_dims] containing the sequences of input vectors. pooling_type: Pooling type to use. Currently only supports 'MAX'. Returns: A tensor of shape [batch_size, sequence_length, input_dims] containing the running 'totals'. """ del pooling_type with tf.name_scope("running_global_pool", values=[inputs]): scan_fct = tf.maximum # Permute inputs so seq_length is first. elems = tf.transpose(inputs, [1, 0, 2]) # Perform scan. cumulatives = tf.scan(scan_fct, elems, swap_memory=True) # Permute output to get back to original order. output = tf.transpose(cumulatives, [1, 0, 2]) return output
Example #30
Source File: temporal_difference.py From planet with Apache License 2.0 | 5 votes |
def lambda_return( reward, value, bootstrap, discount, lambda_, axis, stop_gradient=True): """Average of different multi-step returns. Setting lambda=1 gives a discounted Monte Carlo return. Setting lambda=0 gives a fixed 1-step return. """ assert reward.shape.ndims == value.shape.ndims, (reward.shape, value.shape) # Bring the aggregation dimension front. dims = list(range(reward.shape.ndims)) dims = [axis] + dims[1:axis] + [0] + dims[axis + 1:] reward = tf.transpose(reward, dims) value = tf.transpose(value, dims) if bootstrap is None: bootstrap = tf.zeros_like(value[-1]) next_values = tf.concat([value[1:], bootstrap[None]], 0) inputs = reward + discount * next_values * (1 - lambda_) return_ = tf.scan( fn=lambda agg, cur: cur + discount * lambda_ * agg, elems=inputs, initializer=bootstrap, back_prop=not stop_gradient, reverse=True) return_ = tf.transpose(return_, dims) if stop_gradient: return_ = tf.stop_gradient(return_) return return_