Python tensorflow.compat.v1.add_n() Examples

The following are 30 code examples of tensorflow.compat.v1.add_n(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.compat.v1 , or try the search function .
Example #1
Source File: model.py    From benchmarks with Apache License 2.0 6 votes vote down vote up
def loss_function(self, inputs, build_network_result):
    """Returns the op to measure the loss of the model."""
    logits = build_network_result.logits
    _, labels = inputs
    # TODO(laigd): consider putting the aux logit in the Inception model,
    # which could call super.loss_function twice, once with the normal logits
    # and once with the aux logits.
    aux_logits = build_network_result.extra_info
    with tf.name_scope('xentropy'):
      mlperf.logger.log(key=mlperf.tags.MODEL_HP_LOSS_FN, value=mlperf.tags.CCE)
      cross_entropy = tf.losses.sparse_softmax_cross_entropy(
          logits=logits, labels=labels)
      loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
    if aux_logits is not None:
      with tf.name_scope('aux_xentropy'):
        aux_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
            logits=aux_logits, labels=labels)
        aux_loss = 0.4 * tf.reduce_mean(aux_cross_entropy, name='aux_loss')
        loss = tf.add_n([loss, aux_loss])
    return loss 
Example #2
Source File: tf_modules.py    From tensor2robot with Apache License 2.0 6 votes vote down vote up
def add_context(net, context):
  """Merges visual perception with context using elementwise addition.

  Actions are reshaped to match net dimension depth-wise, and are added to
  the conv layers by broadcasting element-wise across H, W extent.

  Args:
    net: Tensor of shape [batch_size, H, W, C].
    context: Tensor of shape [batch_size * num_examples, C].
  Returns:
    Tensor with shape [batch_size * num_examples, H, W, C]
  """
  num_batch_net = tf.shape(net)[0]
  _, h, w, d1 = net.get_shape().as_list()
  _, d2 = context.get_shape().as_list()
  assert d1 == d2
  context = tf.reshape(context, [num_batch_net, -1, d2])
  net_examples = tile_to_match_context(net, context)
  # Flatten first two dimensions.
  net = tf.reshape(net_examples, [-1, h, w, d1])
  context = tf.reshape(context, [-1, 1, 1, d2])
  context = tf.tile(context, [1, h, w, 1])
  net = tf.add_n([net, context])
  return net 
Example #3
Source File: op_regularizer_manager_test.py    From morph-net with Apache License 2.0 6 votes vote down vote up
def __init__(self, regularizers_to_group):
    """Creates an instance.

    Args:
      regularizers_to_group: A list of generic_regularizers.OpRegularizer
        objects.Their regularization_vector (alive_vector) are expected to be of
        the same length.

    Raises:
      ValueError: regularizers_to_group is not of length at least 2.
    """
    if len(regularizers_to_group) < 2:
      raise ValueError('Groups must be of at least size 2.')
    self._regularization_vector = tf.add_n(
        [r.regularization_vector for r in regularizers_to_group])
    self._alive_vector = tf.cast(
        tf.ones(self._regularization_vector.get_shape()[-1]), tf.bool) 
Example #4
Source File: op_regularizer_manager_test.py    From morph-net with Apache License 2.0 6 votes vote down vote up
def testCorrectSourceOpsWithSkipConnection(self):
    inputs = tf.zeros([2, 4, 4, 3])
    x0 = layers.conv2d(
        inputs, num_outputs=8, kernel_size=3, activation_fn=None, scope='conv0')
    x1 = tf.nn.relu(layers.batch_norm(x0, scale=True, scope='bn0'))
    x1 = layers.conv2d(
        x1, num_outputs=8, kernel_size=3, activation_fn=None, scope='conv1')
    x2 = tf.add_n([x0, x1], name='add')
    final_op = tf.nn.relu(layers.batch_norm(x2, scale=True, scope='bn1'))

    op_handler_dict = self._default_op_handler_dict
    op_reg_manager = orm.OpRegularizerManager([final_op.op], op_handler_dict)

    # All ops are in the same group
    group = list(op_reg_manager._op_group_dict.values())[0]
    source_op_names = [s.op.name for s in group.source_op_slices]
    self.assertSetEqual(set(['bn0/FusedBatchNormV3', 'bn1/FusedBatchNormV3']),
                        set(source_op_names)) 
Example #5
Source File: op_regularizer_manager_test.py    From morph-net with Apache License 2.0 6 votes vote down vote up
def testAddN_Duplicates(self):
    inputs = tf.zeros([2, 4, 4, 3])
    identity = tf.identity(inputs)
    add_n = tf.add_n([identity, identity, identity, identity])
    batch_norm = layers.batch_norm(add_n)

    manager = orm.OpRegularizerManager(
        [batch_norm.op], op_handler_dict=self._default_op_handler_dict)

    op_slices = manager.get_op_slices(identity.op)
    self.assertLen(op_slices, 1)
    op_group = manager.get_op_group(op_slices[0]).op_slices

    # Verify all ops are in the same group.
    for test_op in (identity.op, add_n.op, batch_norm.op):
      test_op_slices = manager.get_op_slices(test_op)
      self.assertLen(test_op_slices, 1)
      self.assertIn(test_op_slices[0], op_group) 
Example #6
Source File: op_regularizer_manager_test.py    From morph-net with Apache License 2.0 6 votes vote down vote up
def testAddN(self):
    inputs = tf.zeros([2, 4, 4, 3])
    identity1 = tf.identity(inputs)
    identity2 = tf.identity(inputs)
    identity3 = tf.identity(inputs)
    identity4 = tf.identity(inputs)
    add_n = tf.add_n([identity1, identity2, identity3, identity4])
    batch_norm = layers.batch_norm(add_n)

    manager = orm.OpRegularizerManager(
        [batch_norm.op], op_handler_dict=self._default_op_handler_dict)

    op_slices = manager.get_op_slices(identity1.op)
    self.assertLen(op_slices, 1)
    op_group = manager.get_op_group(op_slices[0]).op_slices

    # Verify all ops are in the same group.
    for test_op in (identity1.op, identity2.op, identity3.op, identity4.op,
                    add_n.op, batch_norm.op):
      test_op_slices = manager.get_op_slices(test_op)
      self.assertLen(test_op_slices, 1)
      self.assertIn(test_op_slices[0], op_group) 
Example #7
Source File: yellowfin.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def _grad_sparsity(self):
    """Gradient sparsity."""
    # If the sparse minibatch gradient has 10 percent of its entries
    # non-zero, its sparsity is 0.1.
    # The norm of dense gradient averaged from full dataset
    # are roughly estimated norm of minibatch
    # sparse gradient norm * sqrt(sparsity)
    # An extension maybe only correct the sparse blob.
    non_zero_cnt = tf.add_n([tf.count_nonzero(g) for g in self._grad])
    all_entry_cnt = tf.add_n([tf.size(g) for g in self._grad])
    self._sparsity = tf.cast(non_zero_cnt, self._grad[0].dtype)
    self._sparsity /= tf.cast(all_entry_cnt, self._grad[0].dtype)
    avg_op = self._moving_averager.apply([self._sparsity,])
    with tf.control_dependencies([avg_op]):
      self._sparsity_avg = self._moving_averager.average(self._sparsity)
    return avg_op 
Example #8
Source File: t2t_model.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def average_sharded_losses(sharded_losses):
  """Average losses across datashards.

  Args:
    sharded_losses: list<dict<str loss_name, Tensor loss>>. The loss
      can be a single Tensor or a 2-tuple (numerator and denominator).

  Returns:
    losses: dict<str loss_name, Tensor avg_loss>
  """
  losses = {}
  for loss_name in sorted(sharded_losses[0]):
    all_shards = [shard_losses[loss_name] for shard_losses in sharded_losses]
    if isinstance(all_shards[0], tuple):
      sharded_num, sharded_den = zip(*all_shards)
      mean_loss = (
          tf.add_n(sharded_num) / tf.maximum(
              tf.cast(1.0, sharded_den[0].dtype), tf.add_n(sharded_den)))
    else:
      mean_loss = tf.reduce_mean(all_shards)

    losses[loss_name] = mean_loss
  return losses 
Example #9
Source File: optimize.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def weight_decay(decay_rate, var_list, skip_biases=True):
  """Apply weight decay to vars in var_list."""
  if not decay_rate:
    return 0.

  tf.logging.info("Applying weight decay, decay_rate: %0.5f", decay_rate)

  weight_decays = []
  for v in var_list:
    # Weight decay.
    # This is a heuristic way to detect biases that works for main tf.layers.
    is_bias = len(v.shape.as_list()) == 1 and v.name.endswith("bias:0")
    if not (skip_biases and is_bias):
      with tf.device(v.device):
        v_loss = tf.nn.l2_loss(v)
      weight_decays.append(v_loss)

  return tf.add_n(weight_decays) * decay_rate 
Example #10
Source File: networks.py    From magenta with Apache License 2.0 5 votes vote down vote up
def blend_images(x, progress, resolution_schedule, num_blocks):
  """Blends images of different resolutions according to `progress`.

  When training `progress` is at a stable stage for resolution r, returns
  image `x` downscaled to resolution r and then upscaled to `final_resolutions`,
  call it x'(r).

  Otherwise when training `progress` is at a transition stage from resolution
  r to 2r, returns a linear combination of x'(r) and x'(2r).

  Args:
    x: An image `Tensor` of NHWC format with resolution `final_resolutions`.
    progress: A scalar float `Tensor` of training progress.
    resolution_schedule: An object of `ResolutionSchedule`.
    num_blocks: An integer of number of blocks.

  Returns:
    An image `Tensor` which is a blend of images of different resolutions.
  """
  x_blend = []
  for block_id in range(1, num_blocks + 1):
    alpha = _generator_alpha(block_id, progress)
    scale = resolution_schedule.scale_factor(block_id)
    rescaled_x = resolution_schedule.upscale(
        resolution_schedule.downscale(x, scale), scale)
    x_blend.append(alpha * rescaled_x)
  return tf.add_n(x_blend) 
Example #11
Source File: test_forward.py    From incubator-tvm with Apache License 2.0 5 votes vote down vote up
def _test_forward_add_n(inputs):
    tf.reset_default_graph()
    with tf.Graph().as_default():
        temp = []
        for each in inputs:
            temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
        output = tf.add_n(temp)
        compare_tf_with_tvm([each for each in inputs], [
                            each.name for each in temp], output.name) 
Example #12
Source File: test_forward.py    From incubator-tvm with Apache License 2.0 5 votes vote down vote up
def _test_forward_add_n(inputs):
    tf.reset_default_graph()
    with tf.Graph().as_default():
        temp = []
        for each in inputs:
            temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype))
        output = tf.add_n(temp)
        compare_tflite_with_tvm([each for each in inputs], [each.name for each in temp],
                                [each for each in temp], [output]) 
Example #13
Source File: model_deploy.py    From models with Apache License 2.0 5 votes vote down vote up
def _sum_clones_gradients(clone_grads):
  """Calculate the sum gradient for each shared variable across all clones.

  This function assumes that the clone_grads has been scaled appropriately by
  1 / num_clones.

  Args:
    clone_grads: A List of List of tuples (gradient, variable), one list per
    `Clone`.

  Returns:
     List of tuples of (gradient, variable) where the gradient has been summed
     across all clones.
  """
  sum_grads = []
  for grad_and_vars in zip(*clone_grads):
    # Note that each grad_and_vars looks like the following:
    #   ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN))
    grads = []
    var = grad_and_vars[0][1]
    for g, v in grad_and_vars:
      assert v == var
      if g is not None:
        grads.append(g)
    if grads:
      if len(grads) > 1:
        sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads')
      else:
        sum_grad = grads[0]
      sum_grads.append((sum_grad, var))
  return sum_grads 
Example #14
Source File: model_deploy.py    From models with Apache License 2.0 5 votes vote down vote up
def _gather_clone_loss(clone, num_clones, regularization_losses):
  """Gather the loss for a single clone.

  Args:
    clone: A Clone namedtuple.
    num_clones: The number of clones being deployed.
    regularization_losses: Possibly empty list of regularization_losses
      to add to the clone losses.

  Returns:
    A tensor for the total loss for the clone.  Can be None.
  """
  # The return value.
  sum_loss = None
  # Individual components of the loss that will need summaries.
  clone_loss = None
  regularization_loss = None
  # Compute and aggregate losses on the clone device.
  with tf.device(clone.device):
    all_losses = []
    clone_losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)
    if clone_losses:
      clone_loss = tf.add_n(clone_losses, name='clone_loss')
      if num_clones > 1:
        clone_loss = tf.div(clone_loss, 1.0 * num_clones,
                            name='scaled_clone_loss')
      all_losses.append(clone_loss)
    if regularization_losses:
      regularization_loss = tf.add_n(regularization_losses,
                                     name='regularization_loss')
      all_losses.append(regularization_loss)
    if all_losses:
      sum_loss = tf.add_n(all_losses)
  # Add the summaries out of the clone device block.
  if clone_loss is not None:
    tf.summary.scalar('/'.join(filter(None,
                                      ['Losses', clone.scope, 'clone_loss'])),
                      clone_loss)
  if regularization_loss is not None:
    tf.summary.scalar('Losses/regularization_loss', regularization_loss)
  return sum_loss 
Example #15
Source File: run_dualencoder_lsf.py    From language with Apache License 2.0 5 votes vote down vote up
def _get_bert_embeddings(model, layers_to_use, aggregation_fn, name="bert"):
  """Extract embeddings from BERT model."""
  all_hidden = model.get_all_encoder_layers()
  layers_hidden = [all_hidden[i] for i in layers_to_use]
  hidden_shapes = [
      modeling.get_shape_list(hid, expected_rank=3) for hid in all_hidden
  ]

  if len(layers_hidden) == 1:
    hidden_emb = layers_hidden[0]
    hidden_size = hidden_shapes[0][2]
  elif aggregation_fn == "concat":
    hidden_emb = tf.concat(layers_hidden, 2)
    hidden_size = sum([hidden_shapes[i][2] for i in layers_to_use])
  elif aggregation_fn == "average":
    hidden_size = hidden_shapes[0][2]
    assert all([shape[2] == hidden_size for shape in hidden_shapes
               ]), hidden_shapes
    hidden_emb = tf.add_n(layers_hidden) / len(layers_hidden)
  elif aggregation_fn == "attention":
    hidden_size = hidden_shapes[0][2]
    mixing_weights = tf.get_variable(
        name + "/mixing/weights", [len(layers_hidden)],
        initializer=tf.zeros_initializer())
    mixing_scores = tf.nn.softmax(mixing_weights)
    hidden_emb = tf.tensordot(
        tf.stack(layers_hidden, axis=-1), mixing_scores, [[-1], [0]])
  else:
    raise ValueError("Unrecognized aggregation function %s." % aggregation_fn)

  return hidden_emb, hidden_size 
Example #16
Source File: run_dualencoder_qa.py    From language with Apache License 2.0 5 votes vote down vote up
def _get_bert_embeddings(model, layers_to_use, aggregation_fn, name="bert"):
  """Extract embeddings from BERT model."""
  all_hidden = model.get_all_encoder_layers()
  layers_hidden = [all_hidden[i] for i in layers_to_use]
  hidden_shapes = [
      modeling.get_shape_list(hid, expected_rank=3) for hid in all_hidden
  ]

  if len(layers_hidden) == 1:
    hidden_emb = layers_hidden[0]
    hidden_size = hidden_shapes[0][2]
  elif aggregation_fn == "concat":
    hidden_emb = tf.concat(layers_hidden, 2)
    hidden_size = sum([hidden_shapes[i][2] for i in layers_to_use])
  elif aggregation_fn == "average":
    hidden_size = hidden_shapes[0][2]
    assert all([shape[2] == hidden_size for shape in hidden_shapes
               ]), hidden_shapes
    hidden_emb = tf.add_n(layers_hidden) / len(layers_hidden)
  elif aggregation_fn == "attention":
    hidden_size = hidden_shapes[0][2]
    mixing_weights = tf.get_variable(
        name + "/mixing/weights", [len(layers_hidden)],
        initializer=tf.zeros_initializer())
    mixing_scores = tf.nn.softmax(mixing_weights)
    hidden_emb = tf.tensordot(
        tf.stack(layers_hidden, axis=-1), mixing_scores, [[-1], [0]])
  else:
    raise ValueError("Unrecognized aggregation function %s." % aggregation_fn)

  return hidden_emb, hidden_size 
Example #17
Source File: model_fns.py    From language with Apache License 2.0 5 votes vote down vote up
def _get_bert_embeddings(model, layers_to_use, aggregation_fn, name="bert"):
  """Extract embeddings from BERT model."""
  all_hidden = model.get_all_encoder_layers()
  layers_hidden = [all_hidden[i] for i in layers_to_use]
  hidden_shapes = [
      modeling.get_shape_list(hid, expected_rank=3) for hid in all_hidden
  ]

  if len(layers_hidden) == 1:
    hidden_emb = layers_hidden[0]
    hidden_size = hidden_shapes[0][2]
  elif aggregation_fn == "concat":
    hidden_emb = tf.concat(layers_hidden, 2)
    hidden_size = sum([hidden_shapes[i][2] for i in layers_to_use])
  elif aggregation_fn == "average":
    hidden_size = hidden_shapes[0][2]
    assert all([shape[2] == hidden_size for shape in hidden_shapes
               ]), hidden_shapes
    hidden_emb = tf.add_n(layers_hidden) / len(layers_hidden)
  elif aggregation_fn == "attention":
    hidden_size = hidden_shapes[0][2]
    mixing_weights = tf.get_variable(
        name + "/mixing/weights", [len(layers_hidden)],
        initializer=tf.zeros_initializer())
    mixing_scores = tf.nn.softmax(mixing_weights)
    hidden_emb = tf.tensordot(
        tf.stack(layers_hidden, axis=-1), mixing_scores, [[-1], [0]])
  else:
    raise ValueError("Unrecognized aggregation function %s." % aggregation_fn)

  return hidden_emb, hidden_size 
Example #18
Source File: deep_cnn.py    From privacy with Apache License 2.0 5 votes vote down vote up
def loss_fun(logits, labels):
  """Add L2Loss to all the trainable variables.

  Add summary for "Loss" and "Loss/avg".
  Args:
    logits: Logits from inference().
    labels: Labels from distorted_inputs or inputs(). 1-D tensor
            of shape [batch_size]
    distillation: if set to True, use probabilities and not class labels to
                  compute softmax loss

  Returns:
    Loss tensor of type float.
  """

  # Calculate the cross entropy between labels and predictions
  labels = tf.cast(labels, tf.int64)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits=logits, labels=labels, name='cross_entropy_per_example')

  # Calculate the average cross entropy loss across the batch.
  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')

  # Add to TF collection for losses
  tf.add_to_collection('losses', cross_entropy_mean)

  # The total loss is defined as the cross entropy loss plus all of the weight
  # decay terms (L2 loss).
  return tf.add_n(tf.get_collection('losses'), name='total_loss') 
Example #19
Source File: t2t_model.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def _normalize_body_output(self, body_out):
    if isinstance(body_out, tuple):
      output, losses = body_out
      if isinstance(losses, (list, tuple)):
        losses = {"extra": tf.add_n([tf.reduce_mean(l) for l in losses])}
      elif isinstance(losses, dict):
        pass
      else:
        losses = {"extra": tf.reduce_mean(losses)}
    else:
      output = body_out
      losses = {"extra": 0.0}

    return output, losses 
Example #20
Source File: modalities.py    From magenta with Apache License 2.0 5 votes vote down vote up
def bottom_simple(x, model_hparams, vocab_size, name, reuse):
  """Internal bottom transformation."""
  with tf.variable_scope(name, reuse=reuse):
    var = _get_weights(model_hparams, vocab_size)
    x = common_layers.dropout_no_scaling(
        x, 1.0 - model_hparams.symbol_dropout)
    # Add together the embeddings for each tuple position.
    ret = tf.add_n([
        tf.gather(var, x[:, :, :, i] + sum(vocab_size[:i])) *
        tf.expand_dims(tf.to_float(tf.not_equal(x[:, :, :, i], 0)), -1)
        for i in range(len(vocab_size))
    ])
    if model_hparams.multiply_embedding_mode == 'sqrt_depth':
      ret *= model_hparams.hidden_size**0.5
    return ret 
Example #21
Source File: batch_allreduce.py    From benchmarks with Apache License 2.0 5 votes vote down vote up
def _all_reduce_using_copy(tensors_across_devices, use_mean):
  """Does an all-reduce of a list of tensors by copying to the current device.

  The tensors are copied to the current device and then reduced.

  Args:
    tensors_across_devices: A list of tensors, each on a different device.
    use_mean: Whether to take the mean of the tensors instead of a sum:
  Returns:
    A reduced tensor on the current device.
  """
  reduced_tensor = tf.add_n(tensors_across_devices)
  if use_mean:
    reduced_tensor *= 1 / len(tensors_across_devices)
  return reduced_tensor 
Example #22
Source File: variable_mgr_util.py    From benchmarks with Apache License 2.0 5 votes vote down vote up
def aggregate_single_gradient_using_copy(grad_and_vars, use_mean,
                                         check_inf_nan):
  """Calculate the average gradient for a shared variable across all towers.

  Note that this function provides a synchronization point across all towers.

  Args:
    grad_and_vars: A list or tuple of (gradient, variable) tuples. Each
      (gradient, variable) pair within the outer list represents the gradient
      of the variable calculated for a single tower, and the number of pairs
      equals the number of towers.
    use_mean: if True, mean is taken, else sum of gradients is taken.
    check_inf_nan: check grads for nans and infs.

  Returns:
    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
      gradient has been averaged across all towers. The variable is chosen from
      the first tower. The has_nan_or_inf indicates the grads has nan or inf.
  """
  grads = [g for g, _ in grad_and_vars]
  if any(isinstance(g, tf.IndexedSlices) for g in grads):
    # TODO(reedwm): All-reduce IndexedSlices more effectively.
    grad = aggregate_indexed_slices_gradients(grads)
  else:
    grad = tf.add_n(grads)

  if use_mean and len(grads) > 1:
    grad = tf.scalar_mul(1.0 / len(grads), grad)

  v = grad_and_vars[0][1]
  if check_inf_nan:
    with tf.name_scope('check_for_inf_and_nan'):
      has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads)))
    return (grad, v), has_nan_or_inf
  else:
    return (grad, v), None


# This class is copied from
# https://github.com/tensorflow/tensorflow/blob/590d6eef7e91a6a7392c8ffffb7b58f2e0c8bc6b/tensorflow/contrib/training/python/training/device_setter.py#L56.
# We copy it since contrib has been removed from TensorFlow. 
Example #23
Source File: t2t_model.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def loss(self, logits, features):
    if isinstance(logits, dict):
      losses = {}
      for k, v in six.iteritems(logits):
        losses[k] = self._loss_single(
            v,
            k,
            features[k],
            weights=features.get(k + "_mask"))

        n, d = losses[k]
        if common_layers.should_generate_summaries():
          tf.summary.scalar(k + "_loss", n / d)
          tf.summary.scalar(k + "_loss_num", n)
          tf.summary.scalar(k + "_loss_den", d)
          if getattr(self.hparams, "visualize_logits_histogram", False):
            hist = tf.summary.histogram
            hist(k + "_predict", tf.argmax(tf.squeeze(v), axis=-1))
            hist(k + "_targets", features[k])

      return tf.add_n([n / d for n, d in losses.values()])
    else:
      return self._loss_single(
          logits,
          "targets",
          features["targets"],
          weights=features.get("targets_mask")) 
Example #24
Source File: expert_utils.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def reduce_by_device(parallelism, data, reduce_fn):
  """Reduces data per device.

  This can be useful, for example, if we want to all-reduce n tensors on k<n
  devices (like during eval when we have only one device).  We call
  reduce_by_device() to first sum the tensors per device, then call our usual
  all-reduce operation to create one sum per device, followed by
  expand_by_device, to create the appropriate number of pointers to these
  results.  See all_reduce_ring() below for an example of how this is used.

  Args:
    parallelism: a expert_utils.Parallelism object
    data: a list of Tensors with length parallelism.n
    reduce_fn: a function taking a list of Tensors.  e.g. tf.add_n

  Returns:
    device_parallelism: a Parallelism object with each device listed only once.
    reduced_data: A list of Tensors, one per device.
  """
  unique_devices = []
  device_to_data = {}
  for dev, datum in zip(parallelism.devices, data):
    if dev not in device_to_data:
      unique_devices.append(dev)
      device_to_data[dev] = [datum]
    else:
      device_to_data[dev].append(datum)
  device_parallelism = Parallelism(unique_devices)
  grouped_data = [device_to_data[dev] for dev in unique_devices]
  return device_parallelism, device_parallelism(reduce_fn, grouped_data) 
Example #25
Source File: transformer_vae_flow_prior.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def loss_iw(self, logits, features):
    if isinstance(logits, dict):
      losses = {}
      for k, v in six.iteritems(logits):
        losses[k] = self._loss_single_iw(
            v,
            k,
            features[k],
            weights=features.get(k + "_mask"))

        n, d = losses[k]
        if common_layers.should_generate_summaries():
          tf.summary.scalar(k + "_loss", n / d)
          tf.summary.scalar(k + "_loss_num", n)
          tf.summary.scalar(k + "_loss_den", d)
          if getattr(self.hparams, "visualize_logits_histogram", False):
            hist = tf.summary.histogram
            hist(k + "_predict", tf.argmax(tf.squeeze(v), axis=-1))
            hist(k + "_targets", features[k])

      return tf.add_n([n / d for n, d in losses.values()])
    else:
      return self._loss_single_iw(
          logits,
          "targets",
          features["targets"],
          weights=features.get("targets_mask")) 
Example #26
Source File: transformer_vae_flow_prior.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def model_fn_sharded(self, sharded_features):
    """Estimator model_fn sharded along batch dimension.

    Args:
      sharded_features: {str: [Tensor]}. Features sharded along batch dimension.
        Each list is the same length (== number of shards).

    Returns:
      sharded_logits: [Tensor]. Logits for each shard of examples.
      losses: {str: 0-D Tensor}. Loss averaged across shards.
    """
    dp = self._data_parallelism

    # [{str: Tensor}]. Transpose of 'sharded_features'.
    datashard_to_features = self._to_features_per_datashard(sharded_features)
    sharded_logits, sharded_losses, sharded_monitors, _ = (
        dp(self.model_fn, datashard_to_features))
    sharded_logits, sharded_losses = dp(
        self.maybe_scheduled_sampling,
        datashard_to_features, sharded_logits, sharded_losses)
    if isinstance(sharded_logits[0], dict):
      temp_dict = {k: [] for k, _ in six.iteritems(sharded_logits[0])}
      for k, _ in six.iteritems(sharded_logits[0]):
        for l in sharded_logits:
          temp_dict[k].append(l[k])
      sharded_logits = temp_dict
    losses = t2t_model.average_sharded_losses(sharded_losses)
    monitor = {}
    for key in list(sharded_monitors[0].keys()):
      monitor[key] = (
          tf.add_n([m[key] for m in sharded_monitors]) / len(sharded_monitors))
    ops.save_summary(monitor, "monitor")

    return sharded_logits, losses 
Example #27
Source File: common_layers.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def tpu_conv1d(inputs, filters, kernel_size, padding="SAME", name="tpu_conv1d"):
  """Version of conv1d that works on TPU (as of 11/2017).

  Args:
    inputs: a Tensor with shape [batch, length, input_depth].
    filters: an integer.
    kernel_size: an integer.
    padding: a string - "SAME" or "LEFT".
    name: a string.

  Returns:
    a Tensor with shape [batch, length, filters].
  """
  if kernel_size == 1:
    return dense(inputs, filters, name=name, use_bias=True)
  if padding == "SAME":
    assert kernel_size % 2 == 1
    first_offset = -((kernel_size - 1) // 2)
  else:
    assert padding == "LEFT"
    first_offset = -(kernel_size - 1)
  last_offset = first_offset + kernel_size - 1
  results = []
  padded = tf.pad(inputs, [[0, 0], [-first_offset, last_offset], [0, 0]])
  for i in range(kernel_size):
    shifted = tf.slice(padded, [0, i, 0], tf.shape(inputs)) if i else inputs
    shifted.set_shape(inputs.get_shape())
    results.append(
        dense(shifted, filters, use_bias=(i == 0), name=name + "_%d" % i))
  ret = tf.add_n(results)
  ret *= kernel_size**-0.5
  return ret 
Example #28
Source File: modalities_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testSymbolModalityTargets(self):
    batch_size = 10
    num_datashards = 5
    length = 6
    height = 7
    hidden_size = 9
    vocab_size = 11
    model_hparams = common_hparams.basic_params1()
    model_hparams.hidden_size = hidden_size
    model_hparams.mode = tf.estimator.ModeKeys.TRAIN
    body_output = np.random.randint(
        100, size=(batch_size, length, height, hidden_size))
    targets = np.random.randint(
        vocab_size, size=(batch_size, length, height, 1))
    data_parallelism = expert_utils.Parallelism(
        ["/device:CPU:0"] * num_datashards)
    sharded_body_output = tf.split(tf.to_float(body_output), num_datashards)
    sharded_targets = tf.split(targets, num_datashards)
    sharded_logits = data_parallelism(
        modalities.get_top(modalities.ModalityType.SYMBOL),
        sharded_body_output,
        sharded_targets,
        model_hparams,
        vocab_size)
    sharded_loss_num, sharded_loss_den = data_parallelism(
        modalities.get_loss(modalities.ModalityType.SYMBOL),
        sharded_logits,
        sharded_targets,
        model_hparams,
        vocab_size,
        modalities.get_weights_fn(modalities.ModalityType.SYMBOL))
    train_loss = (tf.add_n(sharded_loss_num) /
                  tf.maximum(1.0, tf.add_n(sharded_loss_den)))
    logits = tf.concat(sharded_logits, 0)
    self.evaluate(tf.global_variables_initializer())
    res1, res2 = self.evaluate((logits, train_loss))
    self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size))
    self.assertEqual(res2.shape, ()) 
Example #29
Source File: modalities_test.py    From tensor2tensor with Apache License 2.0 5 votes vote down vote up
def testSymbolModalityTargetsFactored(self):
    batch_size = 10
    num_datashards = 5
    length = 6
    height = 7
    hidden_size = 9
    vocab_size = 11
    model_hparams = common_hparams.basic_params1()
    model_hparams.factored_logits = True
    model_hparams.hidden_size = hidden_size
    model_hparams.mode = tf.estimator.ModeKeys.TRAIN
    body_output = np.random.randint(
        100, size=(batch_size, length, height, hidden_size))
    targets = np.random.randint(
        vocab_size, size=(batch_size, length, height, 1))
    data_parallelism = expert_utils.Parallelism(
        ["/device:CPU:0"] * num_datashards)
    with self.test_session() as session:
      sharded_body_output = tf.split(tf.to_float(body_output), num_datashards)
      sharded_targets = tf.split(targets, num_datashards)
      sharded_logits = data_parallelism(
          modalities.get_top(modalities.ModalityType.SYMBOL),
          sharded_body_output,
          sharded_targets,
          model_hparams,
          vocab_size)
      sharded_loss_num, sharded_loss_den = data_parallelism(
          modalities.get_loss(modalities.ModalityType.SYMBOL),
          sharded_logits,
          sharded_targets,
          model_hparams,
          vocab_size,
          modalities.get_weights_fn(modalities.ModalityType.SYMBOL))
      train_loss = (tf.add_n(sharded_loss_num) /
                    tf.maximum(1.0, tf.add_n(sharded_loss_den)))
      logits = tf.concat(sharded_logits, 0)
      session.run(tf.global_variables_initializer())
      res1, res2 = session.run((logits, train_loss))
    self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size))
    self.assertEqual(res2.shape, ()) 
Example #30
Source File: configurable_ops.py    From morph-net with Apache License 2.0 5 votes vote down vote up
def add_n(self, *args, **kwargs):
    return self._pass_through_mask_list('add_n', 'inputs', *args, **kwargs)