Python tensorflow.identity() Examples

The following are 30 code examples of tensorflow.identity(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: modalities.py    From fine-lm with MIT License 6 votes vote down vote up
def get_channel_embeddings(self,
                             io_depth,
                             targets,
                             hidden_size,
                             name="channel"):
    """Get separate embedding for each of the channels."""
    targets_split = tf.split(targets, io_depth, axis=3)
    rgb_embedding_var = tf.get_variable("rgb_target_emb_%s" % name,
                                        [256 * io_depth, hidden_size])
    rgb_embedding_var = tf.identity(rgb_embedding_var)
    rgb_embedding_var *= float(hidden_size)**0.5
    channel_target_embs = []
    for i in range(io_depth):
      # Adding the channel offsets to get the right embedding since the
      # embedding tensor has shape 256 * io_depth, hidden_size
      target_ids = tf.squeeze(targets_split[i], axis=3) + i * 256
      target_embs = common_layers.gather(rgb_embedding_var, target_ids)
      channel_target_embs.append(target_embs)

    return tf.concat(channel_target_embs, axis=-1) 
Example #2
Source File: collect.py    From fine-lm with MIT License 6 votes vote down vote up
def simulate(self, action):

    # There is subtlety here. We need to collect data
    # obs, action = policy(obs), done, reward = env(abs, action)
    # Thus we need to enqueue data before assigning new observation

    reward, done = self._batch_env.simulate(action)

    with tf.control_dependencies([reward, done]):
      enqueue_op = self.speculum.enqueue(
          [self._observ.read_value(), reward, done, action])

    with tf.control_dependencies([enqueue_op]):
      assign = self._observ.assign(self._batch_env.observ)

    with tf.control_dependencies([assign]):
      return tf.identity(reward), tf.identity(done) 
Example #3
Source File: py_func_batch_env.py    From fine-lm with MIT License 6 votes vote down vote up
def simulate(self, action):
    """Step the batch of environments.

    The results of the step can be accessed from the variables defined below.

    Args:
      action: Tensor holding the batch of actions to apply.

    Returns:
      Operation.
    """
    with tf.name_scope('environment/simulate'):
      if action.dtype in (tf.float16, tf.float32, tf.float64):
        action = tf.check_numerics(action, 'action')
      observ_dtype = utils.parse_dtype(self._batch_env.observation_space)
      observ, reward, done = tf.py_func(
          lambda a: self._batch_env.step(a)[:3], [action],
          [observ_dtype, tf.float32, tf.bool], name='step')
      observ = tf.check_numerics(observ, 'observ')
      reward = tf.check_numerics(reward, 'reward')
      reward.set_shape((len(self),))
      done.set_shape((len(self),))
      with tf.control_dependencies([self._observ.assign(observ)]):
        return tf.identity(reward), tf.identity(done) 
Example #4
Source File: py_func_batch_env.py    From fine-lm with MIT License 6 votes vote down vote up
def _reset_non_empty(self, indices):
    """Reset the batch of environments.

    Args:
      indices: The batch indices of the environments to reset; defaults to all.

    Returns:
      Batch tensor of the new observations.
    """
    observ_dtype = utils.parse_dtype(self._batch_env.observation_space)
    observ = tf.py_func(
        self._batch_env.reset, [indices], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
    with tf.control_dependencies([
        tf.scatter_update(self._observ, indices, observ)]):
      return tf.identity(observ) 
Example #5
Source File: in_graph_batch_env.py    From soccer-matlab with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def reset(self, indices=None):
    """Reset the batch of environments.

    Args:
      indices: The batch indices of the environments to reset; defaults to all.

    Returns:
      Batch tensor of the new observations.
    """
    if indices is None:
      indices = tf.range(len(self._batch_env))
    observ_dtype = self._parse_dtype(self._batch_env.observation_space)
    observ = tf.py_func(
        self._batch_env.reset, [indices], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
    reward = tf.zeros_like(indices, tf.float32)
    done = tf.zeros_like(indices, tf.bool)
    with tf.control_dependencies([
        tf.scatter_update(self._observ, indices, observ),
        tf.scatter_update(self._reward, indices, reward),
        tf.scatter_update(self._done, indices, done)]):
      return tf.identity(observ) 
Example #6
Source File: graphs.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def _lm_loss(self,
               inputs,
               emb_key='lm_embedded',
               lstm_layer='lstm',
               lm_loss_layer='lm_loss',
               loss_name='lm_loss',
               compute_loss=True):
    embedded = self.layers['embedding'](inputs.tokens)
    self.tensors[emb_key] = embedded
    lstm_out, next_state = self.layers[lstm_layer](embedded, inputs.state,
                                                   inputs.length)
    if compute_loss:
      loss = self.layers[lm_loss_layer](
          [lstm_out, inputs.labels, inputs.weights])
      with tf.control_dependencies([inputs.save_state(next_state)]):
        loss = tf.identity(loss)
        tf.summary.scalar(loss_name, loss)

      return loss 
Example #7
Source File: optimize.py    From fine-lm with MIT License 6 votes vote down vote up
def weight_decay_and_noise(loss, hparams, learning_rate, var_list=None):
  """Apply weight decay and weight noise."""
  if var_list is None:
    var_list = tf.trainable_variables()

  decay_vars = [v for v in var_list]
  noise_vars = [v for v in var_list if "/body/" in v.name]

  weight_decay_loss = weight_decay(hparams.weight_decay, decay_vars)
  if hparams.weight_decay:
    tf.summary.scalar("losses/weight_decay", weight_decay_loss)
  weight_noise_ops = weight_noise(hparams.weight_noise, learning_rate,
                                  noise_vars)

  with tf.control_dependencies(weight_noise_ops):
    loss = tf.identity(loss)

  loss += weight_decay_loss
  return loss 
Example #8
Source File: in_graph_batch_env.py    From soccer-matlab with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def reset(self, indices=None):
    """Reset the batch of environments.

    Args:
      indices: The batch indices of the environments to reset; defaults to all.

    Returns:
      Batch tensor of the new observations.
    """
    if indices is None:
      indices = tf.range(len(self._batch_env))
    observ_dtype = self._parse_dtype(self._batch_env.observation_space)
    observ = tf.py_func(
        self._batch_env.reset, [indices], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
    reward = tf.zeros_like(indices, tf.float32)
    done = tf.zeros_like(indices, tf.bool)
    with tf.control_dependencies([
        tf.scatter_update(self._observ, indices, observ),
        tf.scatter_update(self._reward, indices, reward),
        tf.scatter_update(self._done, indices, done)]):
      return tf.identity(observ) 
Example #9
Source File: tf_atari_wrappers.py    From fine-lm with MIT License 6 votes vote down vote up
def simulate(self, action):
    with tf.name_scope("environment/simulate"):  # Do we need this?
      initializer = (tf.zeros(self.old_shape, dtype=tf.float32),
                     tf.fill((len(self),), 0.0), tf.fill((len(self),), False))

      def not_done_step(a, _):
        reward, done = self._batch_env.simulate(action)
        with tf.control_dependencies([reward, done]):
          r0 = self._batch_env.observ + 0
          r1 = tf.add(a[1], reward)
          r2 = tf.logical_or(a[2], done)
          return (r0, r1, r2)

      simulate_ret = tf.scan(not_done_step, tf.range(self.skip),
                             initializer=initializer, parallel_iterations=1,
                             infer_shape=False)
      observations, rewards, dones = simulate_ret
      split_observations = tf.split(observations, self.skip, axis=0)
      split_observations = [tf.squeeze(o, axis=0) for o in split_observations]
      observation = tf.concat(split_observations, axis=-1)
      with tf.control_dependencies([self._observ.assign(observation)]):
        return tf.identity(rewards[-1, ...]), tf.identity(dones[-1, ...]) 
Example #10
Source File: algorithm.py    From soccer-matlab with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _update_value_step(self, observ, reward, length):
    """Compute the current value loss and perform a gradient update step.

    Args:
      observ: Sequences of observations.
      reward: Sequences of reward.
      length: Batch of sequence lengths.

    Returns:
      Tuple of loss tensor and summary tensor.
    """
    loss, summary = self._value_loss(observ, reward, length)
    gradients, variables = (
        zip(*self._value_optimizer.compute_gradients(loss)))
    optimize = self._value_optimizer.apply_gradients(
        zip(gradients, variables))
    summary = tf.summary.merge([
        summary,
        tf.summary.scalar('gradient_norm', tf.global_norm(gradients)),
        utility.gradient_summaries(
            zip(gradients, variables), dict(value=r'.*'))])
    with tf.control_dependencies([optimize]):
      return [tf.identity(loss), tf.identity(summary)] 
Example #11
Source File: common_image_attention.py    From fine-lm with MIT License 6 votes vote down vote up
def get_channel_embeddings(io_depth, targets, hidden_size, name="channel"):
  """Get separate embedding for each of the channels."""
  targets_split = tf.split(targets, io_depth, axis=3)
  rgb_embedding_var = tf.get_variable("rgb_target_emb_%s" % name,
                                      [256 * io_depth, hidden_size])
  rgb_embedding_var = tf.identity(rgb_embedding_var)
  rgb_embedding_var *= float(hidden_size)**0.5
  channel_target_embs = []
  for i in range(io_depth):
    # Adding the channel offsets to get the right embedding since the
    # embedding tensor has shape 256 * io_depth, hidden_size
    target_ids = tf.squeeze(targets_split[i], axis=3) + i * 256
    target_embs = common_layers.gather(rgb_embedding_var, target_ids)
    channel_target_embs.append(target_embs)

  return tf.concat(channel_target_embs, axis=-1) 
Example #12
Source File: component.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def build_structured_training(self, state, network_states):
    """Builds a beam search based training loop for this component.

    The default implementation builds a dummy graph and raises a
    TensorFlow runtime exception to indicate that structured training
    is not implemented.

    Args:
      state: MasterState from the 'AdvanceMaster' op that advances the
        underlying master to this component.
      network_states: dictionary of component NetworkState objects.

    Returns:
      (handle, cost, correct, total) -- These are TF ops corresponding
      to the final handle after unrolling, the total cost, and the
      total number of actions. Since the number of correctly predicted
      actions is not applicable in the structured training setting, a
      dummy value should returned.
    """
    del network_states  # Unused.
    with tf.control_dependencies([tf.Assert(False, ['Not implemented.'])]):
      handle = tf.identity(state.handle)
    cost = tf.constant(0.)
    correct, total = tf.constant(0), tf.constant(0)
    return handle, cost, correct, total 
Example #13
Source File: utils.py    From UROP-Adversarial-Feature-Matching-for-Text-Generation with GNU Affero General Public License v3.0 6 votes vote down vote up
def calculate_mmd(x, y, param, batch_size):
	xt = tf.transpose(x)
	yt = tf.transpose(y)
	x0 = tf.identity(x)
	y0 = tf.identity(y)
	x1 = tf.identity(xt)
	y1 = tf.identity(yt)
	for i in range(batch_size - 1):
		x0 = tf.concat([x0, x], axis=1)
		y0 = tf.concat([y0, y], axis=1)
		x1 = tf.concat([x1, xt], axis=0)
		y1 = tf.concat([y1, yt], axis=0)
	gaussian_mmd = calculate_gaussian_mmd(x0, y0, x1, y1, param, batch_size)
	logistic_mmd = calculate_logistic_mmd(x0, y0, x1, y1, param, batch_size)
	mmd = param['logistic'] * logistic_mmd + param['gaussian'] * gaussian_mmd
	return mmd 
Example #14
Source File: build.py    From Traffic_sign_detection_YOLO with MIT License 6 votes vote down vote up
def build_forward(self):
		verbalise = self.FLAGS.verbalise

		# Placeholders
		inp_size = [None] + self.meta['inp_size']
		self.inp = tf.placeholder(tf.float32, inp_size, 'input')
		self.feed = dict() # other placeholders

		# Build the forward pass
		state = identity(self.inp)
		roof = self.num_layer - self.ntrain
		self.say(HEADER, LINE)
		for i, layer in enumerate(self.darknet.layers):
			scope = '{}-{}'.format(str(i),layer.type)
			args = [layer, state, i, roof, self.feed]
			state = op_create(*args)
			mess = state.verbalise()
			self.say(mess)
		self.say(LINE)

		self.top = state
		self.out = tf.identity(state.out, name='output') 
Example #15
Source File: tfutil.py    From disentangling_conditional_gans with MIT License 6 votes vote down vote up
def autosummary(name, value):
    id = name.replace('/', '_')
    if is_tf_expression(value):
        with tf.name_scope('summary_' + id), tf.device(value.device):
            update_op = _create_autosummary_var(name, value)
            with tf.control_dependencies([update_op]):
                return tf.identity(value)
    else: # python scalar or numpy array
        if name not in _autosummary_immediate:
            with absolute_name_scope('Autosummary/' + id), tf.device(None), tf.control_dependencies(None):
                update_value = tf.placeholder(tf.float32)
                update_op = _create_autosummary_var(name, update_value)
                _autosummary_immediate[name] = update_op, update_value
        update_op, update_value = _autosummary_immediate[name]
        run(update_op, {update_value: np.float32(value)})
        return value

# Create the necessary ops to include autosummaries in TensorBoard report.
# Note: This should be done only once per graph. 
Example #16
Source File: build.py    From Automatic-Identification-and-Counting-of-Blood-Cells with GNU General Public License v3.0 6 votes vote down vote up
def build_forward(self):
        verbalise = self.FLAGS.verbalise

        # Placeholders
        inp_size = [None] + self.meta['inp_size']
        self.inp = tf.placeholder(tf.float32, inp_size, 'input')
        self.feed = dict()  # other placeholders

        # Build the forward pass
        state = identity(self.inp)
        roof = self.num_layer - self.ntrain
        self.say(HEADER, LINE)
        for i, layer in enumerate(self.darknet.layers):
            scope = '{}-{}'.format(str(i), layer.type)
            args = [layer, state, i, roof, self.feed]
            state = op_create(*args)
            mess = state.verbalise()
            self.say(mess)
        self.say(LINE)

        self.top = state
        self.out = tf.identity(state.out, name='output') 
Example #17
Source File: tf_atari_wrappers.py    From fine-lm with MIT License 6 votes vote down vote up
def simulate(self, action):
    with tf.name_scope("environment/simulate"):  # Do we need this?
      initializer = (tf.zeros_like(self._observ),
                     tf.fill((len(self),), 0.0), tf.fill((len(self),), False))

      def not_done_step(a, _):
        reward, done = self._batch_env.simulate(action)
        with tf.control_dependencies([reward, done]):
          # TODO(piotrmilos): possibly ignore envs with done
          r0 = tf.maximum(a[0], self._batch_env.observ)
          r1 = tf.add(a[1], reward)
          r2 = tf.logical_or(a[2], done)

          return (r0, r1, r2)

      simulate_ret = tf.scan(not_done_step, tf.range(self.skip),
                             initializer=initializer, parallel_iterations=1,
                             infer_shape=False)
      simulate_ret = [ret[-1, ...] for ret in simulate_ret]

      with tf.control_dependencies([self._observ.assign(simulate_ret[0])]):
        return tf.identity(simulate_ret[1]), tf.identity(simulate_ret[2]) 
Example #18
Source File: yellowfin.py    From fine-lm with MIT License 5 votes vote down vote up
def _curvature_range(self):
    """Curvature range.

    Returns:
      h_max_t, h_min_t ops
    """
    self._curv_win = tf.get_variable("curv_win",
                                     dtype=tf.float32,
                                     trainable=False,
                                     shape=[self.curvature_window_width,],
                                     initializer=tf.zeros_initializer)
    # We use log smoothing for curvature range
    self._curv_win = tf.scatter_update(self._curv_win,
                                       self._step % self.curvature_window_width,
                                       tf.log(self._grad_norm_squared))
    # Note here the iterations start from iteration 0
    valid_window = tf.slice(self._curv_win,
                            tf.constant([0,]),
                            tf.expand_dims(
                                tf.minimum(
                                    tf.constant(self.curvature_window_width),
                                    self._step + 1), dim=0))
    self._h_min_t = tf.reduce_min(valid_window)
    self._h_max_t = tf.reduce_max(valid_window)

    curv_range_ops = []
    with tf.control_dependencies([self._h_min_t, self._h_max_t]):
      avg_op = self._moving_averager.apply([self._h_min_t, self._h_max_t])
      with tf.control_dependencies([avg_op]):
        self._h_min = tf.exp(
            tf.identity(self._moving_averager.average(self._h_min_t)))
        self._h_max = tf.exp(
            tf.identity(self._moving_averager.average(self._h_max_t)))
        if self._sparsity_debias:
          self._h_min *= self._sparsity_avg
          self._h_max *= self._sparsity_avg
    curv_range_ops.append(avg_op)
    return curv_range_ops  # h_max_t, h_min_t 
Example #19
Source File: loop.py    From soccer-matlab with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _define_step(self, done, score, summary):
    """Combine operations of a phase.

    Keeps track of the mean score and when to report it.

    Args:
      done: Tensor indicating whether current score can be used.
      score: Tensor holding the current, possibly intermediate, score.
      summary: Tensor holding summary string to write if not an empty string.

    Returns:
      Tuple of summary tensor, mean score, and new global step. The mean score
      is zero for non reporting steps.
    """
    if done.shape.ndims == 0:
      done = done[None]
    if score.shape.ndims == 0:
      score = score[None]
    score_mean = streaming_mean.StreamingMean((), tf.float32)
    with tf.control_dependencies([done, score, summary]):
      done_score = tf.gather(score, tf.where(done)[:, 0])
      submit_score = tf.cond(
          tf.reduce_any(done), lambda: score_mean.submit(done_score), tf.no_op)
    with tf.control_dependencies([submit_score]):
      mean_score = tf.cond(self._report, score_mean.clear, float)
      steps_made = tf.shape(score)[0]
      next_step = self._step.assign_add(steps_made)
    with tf.control_dependencies([mean_score, next_step]):
      return tf.identity(summary), mean_score, next_step, steps_made 
Example #20
Source File: in_graph_env.py    From soccer-matlab with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def reset(self):
    """Reset the environment.

    Returns:
      Tensor of the current observation.
    """
    observ_dtype = self._parse_dtype(self._env.observation_space)
    observ = tf.py_func(self._env.reset, [], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
    with tf.control_dependencies([
        self._observ.assign(observ),
        self._reward.assign(0),
        self._done.assign(False)]):
      return tf.identity(observ) 
Example #21
Source File: discretization.py    From fine-lm with MIT License 5 votes vote down vote up
def vq_discrete_bottleneck(x,
                           bottleneck_bits,
                           beta=0.25,
                           decay=0.999,
                           epsilon=1e-5,
                           soft_em=False,
                           num_samples=10):
  """Simple vector quantized discrete bottleneck."""
  bottleneck_size = 2**bottleneck_bits
  x_shape = common_layers.shape_list(x)
  hidden_size = x_shape[-1]
  means, ema_means, ema_count = get_vq_bottleneck(bottleneck_size, hidden_size)
  x = tf.reshape(x, [-1, hidden_size])
  x_means_hot, e_loss = vq_nearest_neighbor(
      x, means, soft_em=soft_em, num_samples=num_samples)

  # Update the ema variables
  updated_ema_count = moving_averages.assign_moving_average(
      ema_count,
      tf.reduce_sum(
          tf.reshape(x_means_hot, shape=[-1, bottleneck_size]), axis=0),
      decay,
      zero_debias=False)

  dw = tf.matmul(x_means_hot, x, transpose_a=True)
  updated_ema_means = tf.identity(moving_averages.assign_moving_average(
      ema_means, dw, decay, zero_debias=False))
  n = tf.reduce_sum(updated_ema_count, axis=-1, keepdims=True)
  updated_ema_count = (
      (updated_ema_count + epsilon) / (n + bottleneck_size * epsilon) * n)
  updated_ema_means /= tf.expand_dims(updated_ema_count, axis=-1)
  with tf.control_dependencies([e_loss]):
    update_means = means.assign(updated_ema_means)
    with tf.control_dependencies([update_means]):
      loss = beta * e_loss

  d = tf.reshape(x_means_hot, x_shape[:-1] + [bottleneck_size])
  return d, loss 
Example #22
Source File: adversary.py    From lirpg with MIT License 5 votes vote down vote up
def build_graph(self, obs_ph, acs_ph, reuse=False):
        with tf.variable_scope(self.scope):
            if reuse:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope("obfilter"):
                self.obs_rms = RunningMeanStd(shape=self.observation_shape)
            obs = (obs_ph - self.obs_rms.mean / self.obs_rms.std)
            _input = tf.concat([obs, acs_ph], axis=1)  # concatenate the two input -> form a transition
            p_h1 = tf.contrib.layers.fully_connected(_input, self.hidden_size, activation_fn=tf.nn.tanh)
            p_h2 = tf.contrib.layers.fully_connected(p_h1, self.hidden_size, activation_fn=tf.nn.tanh)
            logits = tf.contrib.layers.fully_connected(p_h2, 1, activation_fn=tf.identity)
        return logits 
Example #23
Source File: t2t_model.py    From fine-lm with MIT License 5 votes vote down vote up
def _shard_features(self, features):  # pylint: disable=missing-docstring
    sharded_features = dict()
    for k, v in sorted(six.iteritems(features)):
      v = tf.convert_to_tensor(v)
      v_shape = common_layers.shape_list(v)
      if not v_shape:
        v = tf.expand_dims(v, axis=-1)
        v_shape = [1]
      if v_shape == [1]:
        v = tf.tile(v, tf.to_int32([self._num_datashards]))
      sharded_features[k] = self._data_parallelism(
          tf.identity, tf.split(v, self._num_datashards, 0))
    return sharded_features 
Example #24
Source File: algorithm.py    From soccer-matlab with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _update_step(
      self, observ, action, old_mean, old_logstd, reward, advantage, length):
    """Compute the current combined loss and perform a gradient update step.

    Args:
      observ: Sequences of observations.
      action: Sequences of actions.
      old_mean: Sequences of action means of the behavioral policy.
      old_logstd: Sequences of action log stddevs of the behavioral policy.
      reward: Sequences of reward.
      advantage: Sequences of advantages.
      length: Batch of sequence lengths.

    Returns:
      Tuple of value loss, policy loss, and summary tensor.
    """
    value_loss, value_summary = self._value_loss(observ, reward, length)
    network = self._network(observ, length)
    policy_loss, policy_summary = self._policy_loss(
        network.mean, network.logstd, old_mean, old_logstd, action,
        advantage, length)
    value_gradients, value_variables = (
        zip(*self._optimizer.compute_gradients(value_loss)))
    policy_gradients, policy_variables = (
        zip(*self._optimizer.compute_gradients(policy_loss)))
    all_gradients = value_gradients + policy_gradients
    all_variables = value_variables + policy_variables
    optimize = self._optimizer.apply_gradients(
        zip(all_gradients, all_variables))
    summary = tf.summary.merge([
        value_summary, policy_summary,
        tf.summary.scalar(
            'value_gradient_norm', tf.global_norm(value_gradients)),
        tf.summary.scalar(
            'policy_gradient_norm', tf.global_norm(policy_gradients)),
        utility.gradient_summaries(
            zip(value_gradients, value_variables), dict(value=r'.*')),
        utility.gradient_summaries(
            zip(policy_gradients, policy_variables), dict(policy=r'.*'))])
    with tf.control_dependencies([optimize]):
      return [tf.identity(x) for x in (value_loss, policy_loss, summary)] 
Example #25
Source File: algorithm.py    From soccer-matlab with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _training(self):
    """Perform multiple training iterations of both policy and value baseline.

    Training on the episodes collected in the memory. Reset the memory
    afterwards. Always returns a summary string.

    Returns:
      Summary tensor.
    """
    with tf.name_scope('training'):
      assert_full = tf.assert_equal(
          self._memory_index, self._config.update_every)
      with tf.control_dependencies([assert_full]):
        data = self._memory.data()
      (observ, action, old_mean, old_logstd, reward), length = data
      with tf.control_dependencies([tf.assert_greater(length, 0)]):
        length = tf.identity(length)
      observ = self._observ_filter.transform(observ)
      reward = self._reward_filter.transform(reward)
      update_summary = self._perform_update_steps(
          observ, action, old_mean, old_logstd, reward, length)
      with tf.control_dependencies([update_summary]):
        penalty_summary = self._adjust_penalty(
            observ, old_mean, old_logstd, length)
      with tf.control_dependencies([penalty_summary]):
        clear_memory = tf.group(
            self._memory.clear(), self._memory_index.assign(0))
      with tf.control_dependencies([clear_memory]):
        weight_summary = utility.variable_summaries(
            tf.trainable_variables(), self._config.weight_summaries)
        return tf.summary.merge([
            update_summary, penalty_summary, weight_summary]) 
Example #26
Source File: tf_atari_wrappers.py    From fine-lm with MIT License 5 votes vote down vote up
def simulate(self, action):
    action = tf.Print(action, [action], message="action=", summarize=200)

    # action = tf.zeros_like(action) #Temporary hacked bugfix
    reward, done = self._batch_env.simulate(action)
    with tf.control_dependencies([reward, done]):
      with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
        unpacked = discretization.int_to_bit(self._batch_env.observ, 8)
        unpacked = tf.reshape(unpacked, (-1,)+self.observ_shape)
        assign_op = self._observ.assign(unpacked)
        with tf.control_dependencies([assign_op]):
          return tf.identity(reward), tf.identity(done) 
Example #27
Source File: streaming_mean.py    From soccer-matlab with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def clear(self):
    """Return the mean estimate and reset the streaming statistics."""
    value = self._sum / tf.cast(self._count, self._dtype)
    with tf.control_dependencies([value]):
      reset_value = self._sum.assign(tf.zeros_like(self._sum))
      reset_count = self._count.assign(0)
    with tf.control_dependencies([reset_value, reset_count]):
      return tf.identity(value) 
Example #28
Source File: algorithm.py    From soccer-matlab with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _update_policy_step(
      self, observ, action, old_mean, old_logstd, advantage, length):
    """Compute the current policy loss and perform a gradient update step.

    Args:
      observ: Sequences of observations.
      action: Sequences of actions.
      old_mean: Sequences of action means of the behavioral policy.
      old_logstd: Sequences of action log stddevs of the behavioral policy.
      advantage: Sequences of advantages.
      length: Batch of sequence lengths.

    Returns:
      Tuple of loss tensor and summary tensor.
    """
    network = self._network(observ, length)
    loss, summary = self._policy_loss(
        network.mean, network.logstd, old_mean, old_logstd, action,
        advantage, length)
    gradients, variables = (
        zip(*self._policy_optimizer.compute_gradients(loss)))
    optimize = self._policy_optimizer.apply_gradients(
        zip(gradients, variables))
    summary = tf.summary.merge([
        summary,
        tf.summary.scalar('gradient_norm', tf.global_norm(gradients)),
        utility.gradient_summaries(
            zip(gradients, variables), dict(policy=r'.*'))])
    with tf.control_dependencies([optimize]):
      return [tf.identity(loss), tf.identity(summary)] 
Example #29
Source File: algorithm.py    From soccer-matlab with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _training(self):
    """Perform multiple training iterations of both policy and value baseline.

    Training on the episodes collected in the memory. Reset the memory
    afterwards. Always returns a summary string.

    Returns:
      Summary tensor.
    """
    with tf.name_scope('training'):
      assert_full = tf.assert_equal(
          self._memory_index, self._config.update_every)
      with tf.control_dependencies([assert_full]):
        data = self._memory.data()
      (observ, action, old_mean, old_logstd, reward), length = data
      with tf.control_dependencies([tf.assert_greater(length, 0)]):
        length = tf.identity(length)
      observ = self._observ_filter.transform(observ)
      reward = self._reward_filter.transform(reward)
      policy_summary = self._update_policy(
          observ, action, old_mean, old_logstd, reward, length)
      with tf.control_dependencies([policy_summary]):
        value_summary = self._update_value(observ, reward, length)
      with tf.control_dependencies([value_summary]):
        penalty_summary = self._adjust_penalty(
            observ, old_mean, old_logstd, length)
      with tf.control_dependencies([penalty_summary]):
        clear_memory = tf.group(
            self._memory.clear(), self._memory_index.assign(0))
      with tf.control_dependencies([clear_memory]):
        weight_summary = utility.variable_summaries(
            tf.trainable_variables(), self._config.weight_summaries)
        return tf.summary.merge([
            policy_summary, value_summary, penalty_summary, weight_summary]) 
Example #30
Source File: utils.py    From DOTA_models with Apache License 2.0 5 votes vote down vote up
def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False,
           normalized=False, name=None, collections=None):
  """Linear (affine) transformation, y = x W + b, for a variety of
  configurations.

  Args:
    x: input The tensor to tranformation.
    out_size: The integer size of non-batch output dimension.
    do_bias (optional): Add a learnable bias vector to the operation.
    alpha (optional): A multiplicative scaling for the weight initialization
      of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}.
    identity_if_possible (optional): just return identity,
      if x.shape[1] == out_size.
    normalized (optional): Option to divide out by the norms of the rows of W.
    name (optional): The name prefix to add to variables.
    collections (optional): List of additional collections. (Placed in
      tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.)

  Returns:
    In the equation, y = x W + b, returns the tensorflow op that yields y.
  """
  in_size = int(x.get_shape()[1]) # from Dimension(10) -> 10
  stddev = alpha/np.sqrt(float(in_size))
  mat_init = tf.random_normal_initializer(0.0, stddev)
  wname = (name + "/W") if name else "/W"

  if identity_if_possible and in_size == out_size:
    # Sometimes linear layers are nothing more than size adapters.
    return tf.identity(x, name=(wname+'_ident'))

  W,b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha,
                    normalized=normalized, name=name, collections=collections)

  if do_bias:
    return tf.matmul(x, W) + b
  else:
    return tf.matmul(x, W)