Python tensorflow.compat.v1.add_n() Examples
The following are 30
code examples of tensorflow.compat.v1.add_n().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.compat.v1
, or try the search function
.
Example #1
Source File: model.py From benchmarks with Apache License 2.0 | 6 votes |
def loss_function(self, inputs, build_network_result): """Returns the op to measure the loss of the model.""" logits = build_network_result.logits _, labels = inputs # TODO(laigd): consider putting the aux logit in the Inception model, # which could call super.loss_function twice, once with the normal logits # and once with the aux logits. aux_logits = build_network_result.extra_info with tf.name_scope('xentropy'): mlperf.logger.log(key=mlperf.tags.MODEL_HP_LOSS_FN, value=mlperf.tags.CCE) cross_entropy = tf.losses.sparse_softmax_cross_entropy( logits=logits, labels=labels) loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') if aux_logits is not None: with tf.name_scope('aux_xentropy'): aux_cross_entropy = tf.losses.sparse_softmax_cross_entropy( logits=aux_logits, labels=labels) aux_loss = 0.4 * tf.reduce_mean(aux_cross_entropy, name='aux_loss') loss = tf.add_n([loss, aux_loss]) return loss
Example #2
Source File: tf_modules.py From tensor2robot with Apache License 2.0 | 6 votes |
def add_context(net, context): """Merges visual perception with context using elementwise addition. Actions are reshaped to match net dimension depth-wise, and are added to the conv layers by broadcasting element-wise across H, W extent. Args: net: Tensor of shape [batch_size, H, W, C]. context: Tensor of shape [batch_size * num_examples, C]. Returns: Tensor with shape [batch_size * num_examples, H, W, C] """ num_batch_net = tf.shape(net)[0] _, h, w, d1 = net.get_shape().as_list() _, d2 = context.get_shape().as_list() assert d1 == d2 context = tf.reshape(context, [num_batch_net, -1, d2]) net_examples = tile_to_match_context(net, context) # Flatten first two dimensions. net = tf.reshape(net_examples, [-1, h, w, d1]) context = tf.reshape(context, [-1, 1, 1, d2]) context = tf.tile(context, [1, h, w, 1]) net = tf.add_n([net, context]) return net
Example #3
Source File: op_regularizer_manager_test.py From morph-net with Apache License 2.0 | 6 votes |
def __init__(self, regularizers_to_group): """Creates an instance. Args: regularizers_to_group: A list of generic_regularizers.OpRegularizer objects.Their regularization_vector (alive_vector) are expected to be of the same length. Raises: ValueError: regularizers_to_group is not of length at least 2. """ if len(regularizers_to_group) < 2: raise ValueError('Groups must be of at least size 2.') self._regularization_vector = tf.add_n( [r.regularization_vector for r in regularizers_to_group]) self._alive_vector = tf.cast( tf.ones(self._regularization_vector.get_shape()[-1]), tf.bool)
Example #4
Source File: op_regularizer_manager_test.py From morph-net with Apache License 2.0 | 6 votes |
def testCorrectSourceOpsWithSkipConnection(self): inputs = tf.zeros([2, 4, 4, 3]) x0 = layers.conv2d( inputs, num_outputs=8, kernel_size=3, activation_fn=None, scope='conv0') x1 = tf.nn.relu(layers.batch_norm(x0, scale=True, scope='bn0')) x1 = layers.conv2d( x1, num_outputs=8, kernel_size=3, activation_fn=None, scope='conv1') x2 = tf.add_n([x0, x1], name='add') final_op = tf.nn.relu(layers.batch_norm(x2, scale=True, scope='bn1')) op_handler_dict = self._default_op_handler_dict op_reg_manager = orm.OpRegularizerManager([final_op.op], op_handler_dict) # All ops are in the same group group = list(op_reg_manager._op_group_dict.values())[0] source_op_names = [s.op.name for s in group.source_op_slices] self.assertSetEqual(set(['bn0/FusedBatchNormV3', 'bn1/FusedBatchNormV3']), set(source_op_names))
Example #5
Source File: op_regularizer_manager_test.py From morph-net with Apache License 2.0 | 6 votes |
def testAddN_Duplicates(self): inputs = tf.zeros([2, 4, 4, 3]) identity = tf.identity(inputs) add_n = tf.add_n([identity, identity, identity, identity]) batch_norm = layers.batch_norm(add_n) manager = orm.OpRegularizerManager( [batch_norm.op], op_handler_dict=self._default_op_handler_dict) op_slices = manager.get_op_slices(identity.op) self.assertLen(op_slices, 1) op_group = manager.get_op_group(op_slices[0]).op_slices # Verify all ops are in the same group. for test_op in (identity.op, add_n.op, batch_norm.op): test_op_slices = manager.get_op_slices(test_op) self.assertLen(test_op_slices, 1) self.assertIn(test_op_slices[0], op_group)
Example #6
Source File: op_regularizer_manager_test.py From morph-net with Apache License 2.0 | 6 votes |
def testAddN(self): inputs = tf.zeros([2, 4, 4, 3]) identity1 = tf.identity(inputs) identity2 = tf.identity(inputs) identity3 = tf.identity(inputs) identity4 = tf.identity(inputs) add_n = tf.add_n([identity1, identity2, identity3, identity4]) batch_norm = layers.batch_norm(add_n) manager = orm.OpRegularizerManager( [batch_norm.op], op_handler_dict=self._default_op_handler_dict) op_slices = manager.get_op_slices(identity1.op) self.assertLen(op_slices, 1) op_group = manager.get_op_group(op_slices[0]).op_slices # Verify all ops are in the same group. for test_op in (identity1.op, identity2.op, identity3.op, identity4.op, add_n.op, batch_norm.op): test_op_slices = manager.get_op_slices(test_op) self.assertLen(test_op_slices, 1) self.assertIn(test_op_slices[0], op_group)
Example #7
Source File: yellowfin.py From tensor2tensor with Apache License 2.0 | 6 votes |
def _grad_sparsity(self): """Gradient sparsity.""" # If the sparse minibatch gradient has 10 percent of its entries # non-zero, its sparsity is 0.1. # The norm of dense gradient averaged from full dataset # are roughly estimated norm of minibatch # sparse gradient norm * sqrt(sparsity) # An extension maybe only correct the sparse blob. non_zero_cnt = tf.add_n([tf.count_nonzero(g) for g in self._grad]) all_entry_cnt = tf.add_n([tf.size(g) for g in self._grad]) self._sparsity = tf.cast(non_zero_cnt, self._grad[0].dtype) self._sparsity /= tf.cast(all_entry_cnt, self._grad[0].dtype) avg_op = self._moving_averager.apply([self._sparsity,]) with tf.control_dependencies([avg_op]): self._sparsity_avg = self._moving_averager.average(self._sparsity) return avg_op
Example #8
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 6 votes |
def average_sharded_losses(sharded_losses): """Average losses across datashards. Args: sharded_losses: list<dict<str loss_name, Tensor loss>>. The loss can be a single Tensor or a 2-tuple (numerator and denominator). Returns: losses: dict<str loss_name, Tensor avg_loss> """ losses = {} for loss_name in sorted(sharded_losses[0]): all_shards = [shard_losses[loss_name] for shard_losses in sharded_losses] if isinstance(all_shards[0], tuple): sharded_num, sharded_den = zip(*all_shards) mean_loss = ( tf.add_n(sharded_num) / tf.maximum( tf.cast(1.0, sharded_den[0].dtype), tf.add_n(sharded_den))) else: mean_loss = tf.reduce_mean(all_shards) losses[loss_name] = mean_loss return losses
Example #9
Source File: optimize.py From tensor2tensor with Apache License 2.0 | 6 votes |
def weight_decay(decay_rate, var_list, skip_biases=True): """Apply weight decay to vars in var_list.""" if not decay_rate: return 0. tf.logging.info("Applying weight decay, decay_rate: %0.5f", decay_rate) weight_decays = [] for v in var_list: # Weight decay. # This is a heuristic way to detect biases that works for main tf.layers. is_bias = len(v.shape.as_list()) == 1 and v.name.endswith("bias:0") if not (skip_biases and is_bias): with tf.device(v.device): v_loss = tf.nn.l2_loss(v) weight_decays.append(v_loss) return tf.add_n(weight_decays) * decay_rate
Example #10
Source File: networks.py From magenta with Apache License 2.0 | 5 votes |
def blend_images(x, progress, resolution_schedule, num_blocks): """Blends images of different resolutions according to `progress`. When training `progress` is at a stable stage for resolution r, returns image `x` downscaled to resolution r and then upscaled to `final_resolutions`, call it x'(r). Otherwise when training `progress` is at a transition stage from resolution r to 2r, returns a linear combination of x'(r) and x'(2r). Args: x: An image `Tensor` of NHWC format with resolution `final_resolutions`. progress: A scalar float `Tensor` of training progress. resolution_schedule: An object of `ResolutionSchedule`. num_blocks: An integer of number of blocks. Returns: An image `Tensor` which is a blend of images of different resolutions. """ x_blend = [] for block_id in range(1, num_blocks + 1): alpha = _generator_alpha(block_id, progress) scale = resolution_schedule.scale_factor(block_id) rescaled_x = resolution_schedule.upscale( resolution_schedule.downscale(x, scale), scale) x_blend.append(alpha * rescaled_x) return tf.add_n(x_blend)
Example #11
Source File: test_forward.py From incubator-tvm with Apache License 2.0 | 5 votes |
def _test_forward_add_n(inputs): tf.reset_default_graph() with tf.Graph().as_default(): temp = [] for each in inputs: temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype)) output = tf.add_n(temp) compare_tf_with_tvm([each for each in inputs], [ each.name for each in temp], output.name)
Example #12
Source File: test_forward.py From incubator-tvm with Apache License 2.0 | 5 votes |
def _test_forward_add_n(inputs): tf.reset_default_graph() with tf.Graph().as_default(): temp = [] for each in inputs: temp.append(tf.placeholder(shape=each.shape, dtype=each.dtype)) output = tf.add_n(temp) compare_tflite_with_tvm([each for each in inputs], [each.name for each in temp], [each for each in temp], [output])
Example #13
Source File: model_deploy.py From models with Apache License 2.0 | 5 votes |
def _sum_clones_gradients(clone_grads): """Calculate the sum gradient for each shared variable across all clones. This function assumes that the clone_grads has been scaled appropriately by 1 / num_clones. Args: clone_grads: A List of List of tuples (gradient, variable), one list per `Clone`. Returns: List of tuples of (gradient, variable) where the gradient has been summed across all clones. """ sum_grads = [] for grad_and_vars in zip(*clone_grads): # Note that each grad_and_vars looks like the following: # ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN)) grads = [] var = grad_and_vars[0][1] for g, v in grad_and_vars: assert v == var if g is not None: grads.append(g) if grads: if len(grads) > 1: sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads') else: sum_grad = grads[0] sum_grads.append((sum_grad, var)) return sum_grads
Example #14
Source File: model_deploy.py From models with Apache License 2.0 | 5 votes |
def _gather_clone_loss(clone, num_clones, regularization_losses): """Gather the loss for a single clone. Args: clone: A Clone namedtuple. num_clones: The number of clones being deployed. regularization_losses: Possibly empty list of regularization_losses to add to the clone losses. Returns: A tensor for the total loss for the clone. Can be None. """ # The return value. sum_loss = None # Individual components of the loss that will need summaries. clone_loss = None regularization_loss = None # Compute and aggregate losses on the clone device. with tf.device(clone.device): all_losses = [] clone_losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope) if clone_losses: clone_loss = tf.add_n(clone_losses, name='clone_loss') if num_clones > 1: clone_loss = tf.div(clone_loss, 1.0 * num_clones, name='scaled_clone_loss') all_losses.append(clone_loss) if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') all_losses.append(regularization_loss) if all_losses: sum_loss = tf.add_n(all_losses) # Add the summaries out of the clone device block. if clone_loss is not None: tf.summary.scalar('/'.join(filter(None, ['Losses', clone.scope, 'clone_loss'])), clone_loss) if regularization_loss is not None: tf.summary.scalar('Losses/regularization_loss', regularization_loss) return sum_loss
Example #15
Source File: run_dualencoder_lsf.py From language with Apache License 2.0 | 5 votes |
def _get_bert_embeddings(model, layers_to_use, aggregation_fn, name="bert"): """Extract embeddings from BERT model.""" all_hidden = model.get_all_encoder_layers() layers_hidden = [all_hidden[i] for i in layers_to_use] hidden_shapes = [ modeling.get_shape_list(hid, expected_rank=3) for hid in all_hidden ] if len(layers_hidden) == 1: hidden_emb = layers_hidden[0] hidden_size = hidden_shapes[0][2] elif aggregation_fn == "concat": hidden_emb = tf.concat(layers_hidden, 2) hidden_size = sum([hidden_shapes[i][2] for i in layers_to_use]) elif aggregation_fn == "average": hidden_size = hidden_shapes[0][2] assert all([shape[2] == hidden_size for shape in hidden_shapes ]), hidden_shapes hidden_emb = tf.add_n(layers_hidden) / len(layers_hidden) elif aggregation_fn == "attention": hidden_size = hidden_shapes[0][2] mixing_weights = tf.get_variable( name + "/mixing/weights", [len(layers_hidden)], initializer=tf.zeros_initializer()) mixing_scores = tf.nn.softmax(mixing_weights) hidden_emb = tf.tensordot( tf.stack(layers_hidden, axis=-1), mixing_scores, [[-1], [0]]) else: raise ValueError("Unrecognized aggregation function %s." % aggregation_fn) return hidden_emb, hidden_size
Example #16
Source File: run_dualencoder_qa.py From language with Apache License 2.0 | 5 votes |
def _get_bert_embeddings(model, layers_to_use, aggregation_fn, name="bert"): """Extract embeddings from BERT model.""" all_hidden = model.get_all_encoder_layers() layers_hidden = [all_hidden[i] for i in layers_to_use] hidden_shapes = [ modeling.get_shape_list(hid, expected_rank=3) for hid in all_hidden ] if len(layers_hidden) == 1: hidden_emb = layers_hidden[0] hidden_size = hidden_shapes[0][2] elif aggregation_fn == "concat": hidden_emb = tf.concat(layers_hidden, 2) hidden_size = sum([hidden_shapes[i][2] for i in layers_to_use]) elif aggregation_fn == "average": hidden_size = hidden_shapes[0][2] assert all([shape[2] == hidden_size for shape in hidden_shapes ]), hidden_shapes hidden_emb = tf.add_n(layers_hidden) / len(layers_hidden) elif aggregation_fn == "attention": hidden_size = hidden_shapes[0][2] mixing_weights = tf.get_variable( name + "/mixing/weights", [len(layers_hidden)], initializer=tf.zeros_initializer()) mixing_scores = tf.nn.softmax(mixing_weights) hidden_emb = tf.tensordot( tf.stack(layers_hidden, axis=-1), mixing_scores, [[-1], [0]]) else: raise ValueError("Unrecognized aggregation function %s." % aggregation_fn) return hidden_emb, hidden_size
Example #17
Source File: model_fns.py From language with Apache License 2.0 | 5 votes |
def _get_bert_embeddings(model, layers_to_use, aggregation_fn, name="bert"): """Extract embeddings from BERT model.""" all_hidden = model.get_all_encoder_layers() layers_hidden = [all_hidden[i] for i in layers_to_use] hidden_shapes = [ modeling.get_shape_list(hid, expected_rank=3) for hid in all_hidden ] if len(layers_hidden) == 1: hidden_emb = layers_hidden[0] hidden_size = hidden_shapes[0][2] elif aggregation_fn == "concat": hidden_emb = tf.concat(layers_hidden, 2) hidden_size = sum([hidden_shapes[i][2] for i in layers_to_use]) elif aggregation_fn == "average": hidden_size = hidden_shapes[0][2] assert all([shape[2] == hidden_size for shape in hidden_shapes ]), hidden_shapes hidden_emb = tf.add_n(layers_hidden) / len(layers_hidden) elif aggregation_fn == "attention": hidden_size = hidden_shapes[0][2] mixing_weights = tf.get_variable( name + "/mixing/weights", [len(layers_hidden)], initializer=tf.zeros_initializer()) mixing_scores = tf.nn.softmax(mixing_weights) hidden_emb = tf.tensordot( tf.stack(layers_hidden, axis=-1), mixing_scores, [[-1], [0]]) else: raise ValueError("Unrecognized aggregation function %s." % aggregation_fn) return hidden_emb, hidden_size
Example #18
Source File: deep_cnn.py From privacy with Apache License 2.0 | 5 votes |
def loss_fun(logits, labels): """Add L2Loss to all the trainable variables. Add summary for "Loss" and "Loss/avg". Args: logits: Logits from inference(). labels: Labels from distorted_inputs or inputs(). 1-D tensor of shape [batch_size] distillation: if set to True, use probabilities and not class labels to compute softmax loss Returns: Loss tensor of type float. """ # Calculate the cross entropy between labels and predictions labels = tf.cast(labels, tf.int64) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=labels, name='cross_entropy_per_example') # Calculate the average cross entropy loss across the batch. cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') # Add to TF collection for losses tf.add_to_collection('losses', cross_entropy_mean) # The total loss is defined as the cross entropy loss plus all of the weight # decay terms (L2 loss). return tf.add_n(tf.get_collection('losses'), name='total_loss')
Example #19
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 5 votes |
def _normalize_body_output(self, body_out): if isinstance(body_out, tuple): output, losses = body_out if isinstance(losses, (list, tuple)): losses = {"extra": tf.add_n([tf.reduce_mean(l) for l in losses])} elif isinstance(losses, dict): pass else: losses = {"extra": tf.reduce_mean(losses)} else: output = body_out losses = {"extra": 0.0} return output, losses
Example #20
Source File: modalities.py From magenta with Apache License 2.0 | 5 votes |
def bottom_simple(x, model_hparams, vocab_size, name, reuse): """Internal bottom transformation.""" with tf.variable_scope(name, reuse=reuse): var = _get_weights(model_hparams, vocab_size) x = common_layers.dropout_no_scaling( x, 1.0 - model_hparams.symbol_dropout) # Add together the embeddings for each tuple position. ret = tf.add_n([ tf.gather(var, x[:, :, :, i] + sum(vocab_size[:i])) * tf.expand_dims(tf.to_float(tf.not_equal(x[:, :, :, i], 0)), -1) for i in range(len(vocab_size)) ]) if model_hparams.multiply_embedding_mode == 'sqrt_depth': ret *= model_hparams.hidden_size**0.5 return ret
Example #21
Source File: batch_allreduce.py From benchmarks with Apache License 2.0 | 5 votes |
def _all_reduce_using_copy(tensors_across_devices, use_mean): """Does an all-reduce of a list of tensors by copying to the current device. The tensors are copied to the current device and then reduced. Args: tensors_across_devices: A list of tensors, each on a different device. use_mean: Whether to take the mean of the tensors instead of a sum: Returns: A reduced tensor on the current device. """ reduced_tensor = tf.add_n(tensors_across_devices) if use_mean: reduced_tensor *= 1 / len(tensors_across_devices) return reduced_tensor
Example #22
Source File: variable_mgr_util.py From benchmarks with Apache License 2.0 | 5 votes |
def aggregate_single_gradient_using_copy(grad_and_vars, use_mean, check_inf_nan): """Calculate the average gradient for a shared variable across all towers. Note that this function provides a synchronization point across all towers. Args: grad_and_vars: A list or tuple of (gradient, variable) tuples. Each (gradient, variable) pair within the outer list represents the gradient of the variable calculated for a single tower, and the number of pairs equals the number of towers. use_mean: if True, mean is taken, else sum of gradients is taken. check_inf_nan: check grads for nans and infs. Returns: The tuple ([(average_gradient, variable),], has_nan_or_inf) where the gradient has been averaged across all towers. The variable is chosen from the first tower. The has_nan_or_inf indicates the grads has nan or inf. """ grads = [g for g, _ in grad_and_vars] if any(isinstance(g, tf.IndexedSlices) for g in grads): # TODO(reedwm): All-reduce IndexedSlices more effectively. grad = aggregate_indexed_slices_gradients(grads) else: grad = tf.add_n(grads) if use_mean and len(grads) > 1: grad = tf.scalar_mul(1.0 / len(grads), grad) v = grad_and_vars[0][1] if check_inf_nan: with tf.name_scope('check_for_inf_and_nan'): has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads))) return (grad, v), has_nan_or_inf else: return (grad, v), None # This class is copied from # https://github.com/tensorflow/tensorflow/blob/590d6eef7e91a6a7392c8ffffb7b58f2e0c8bc6b/tensorflow/contrib/training/python/training/device_setter.py#L56. # We copy it since contrib has been removed from TensorFlow.
Example #23
Source File: t2t_model.py From tensor2tensor with Apache License 2.0 | 5 votes |
def loss(self, logits, features): if isinstance(logits, dict): losses = {} for k, v in six.iteritems(logits): losses[k] = self._loss_single( v, k, features[k], weights=features.get(k + "_mask")) n, d = losses[k] if common_layers.should_generate_summaries(): tf.summary.scalar(k + "_loss", n / d) tf.summary.scalar(k + "_loss_num", n) tf.summary.scalar(k + "_loss_den", d) if getattr(self.hparams, "visualize_logits_histogram", False): hist = tf.summary.histogram hist(k + "_predict", tf.argmax(tf.squeeze(v), axis=-1)) hist(k + "_targets", features[k]) return tf.add_n([n / d for n, d in losses.values()]) else: return self._loss_single( logits, "targets", features["targets"], weights=features.get("targets_mask"))
Example #24
Source File: expert_utils.py From tensor2tensor with Apache License 2.0 | 5 votes |
def reduce_by_device(parallelism, data, reduce_fn): """Reduces data per device. This can be useful, for example, if we want to all-reduce n tensors on k<n devices (like during eval when we have only one device). We call reduce_by_device() to first sum the tensors per device, then call our usual all-reduce operation to create one sum per device, followed by expand_by_device, to create the appropriate number of pointers to these results. See all_reduce_ring() below for an example of how this is used. Args: parallelism: a expert_utils.Parallelism object data: a list of Tensors with length parallelism.n reduce_fn: a function taking a list of Tensors. e.g. tf.add_n Returns: device_parallelism: a Parallelism object with each device listed only once. reduced_data: A list of Tensors, one per device. """ unique_devices = [] device_to_data = {} for dev, datum in zip(parallelism.devices, data): if dev not in device_to_data: unique_devices.append(dev) device_to_data[dev] = [datum] else: device_to_data[dev].append(datum) device_parallelism = Parallelism(unique_devices) grouped_data = [device_to_data[dev] for dev in unique_devices] return device_parallelism, device_parallelism(reduce_fn, grouped_data)
Example #25
Source File: transformer_vae_flow_prior.py From tensor2tensor with Apache License 2.0 | 5 votes |
def loss_iw(self, logits, features): if isinstance(logits, dict): losses = {} for k, v in six.iteritems(logits): losses[k] = self._loss_single_iw( v, k, features[k], weights=features.get(k + "_mask")) n, d = losses[k] if common_layers.should_generate_summaries(): tf.summary.scalar(k + "_loss", n / d) tf.summary.scalar(k + "_loss_num", n) tf.summary.scalar(k + "_loss_den", d) if getattr(self.hparams, "visualize_logits_histogram", False): hist = tf.summary.histogram hist(k + "_predict", tf.argmax(tf.squeeze(v), axis=-1)) hist(k + "_targets", features[k]) return tf.add_n([n / d for n, d in losses.values()]) else: return self._loss_single_iw( logits, "targets", features["targets"], weights=features.get("targets_mask"))
Example #26
Source File: transformer_vae_flow_prior.py From tensor2tensor with Apache License 2.0 | 5 votes |
def model_fn_sharded(self, sharded_features): """Estimator model_fn sharded along batch dimension. Args: sharded_features: {str: [Tensor]}. Features sharded along batch dimension. Each list is the same length (== number of shards). Returns: sharded_logits: [Tensor]. Logits for each shard of examples. losses: {str: 0-D Tensor}. Loss averaged across shards. """ dp = self._data_parallelism # [{str: Tensor}]. Transpose of 'sharded_features'. datashard_to_features = self._to_features_per_datashard(sharded_features) sharded_logits, sharded_losses, sharded_monitors, _ = ( dp(self.model_fn, datashard_to_features)) sharded_logits, sharded_losses = dp( self.maybe_scheduled_sampling, datashard_to_features, sharded_logits, sharded_losses) if isinstance(sharded_logits[0], dict): temp_dict = {k: [] for k, _ in six.iteritems(sharded_logits[0])} for k, _ in six.iteritems(sharded_logits[0]): for l in sharded_logits: temp_dict[k].append(l[k]) sharded_logits = temp_dict losses = t2t_model.average_sharded_losses(sharded_losses) monitor = {} for key in list(sharded_monitors[0].keys()): monitor[key] = ( tf.add_n([m[key] for m in sharded_monitors]) / len(sharded_monitors)) ops.save_summary(monitor, "monitor") return sharded_logits, losses
Example #27
Source File: common_layers.py From tensor2tensor with Apache License 2.0 | 5 votes |
def tpu_conv1d(inputs, filters, kernel_size, padding="SAME", name="tpu_conv1d"): """Version of conv1d that works on TPU (as of 11/2017). Args: inputs: a Tensor with shape [batch, length, input_depth]. filters: an integer. kernel_size: an integer. padding: a string - "SAME" or "LEFT". name: a string. Returns: a Tensor with shape [batch, length, filters]. """ if kernel_size == 1: return dense(inputs, filters, name=name, use_bias=True) if padding == "SAME": assert kernel_size % 2 == 1 first_offset = -((kernel_size - 1) // 2) else: assert padding == "LEFT" first_offset = -(kernel_size - 1) last_offset = first_offset + kernel_size - 1 results = [] padded = tf.pad(inputs, [[0, 0], [-first_offset, last_offset], [0, 0]]) for i in range(kernel_size): shifted = tf.slice(padded, [0, i, 0], tf.shape(inputs)) if i else inputs shifted.set_shape(inputs.get_shape()) results.append( dense(shifted, filters, use_bias=(i == 0), name=name + "_%d" % i)) ret = tf.add_n(results) ret *= kernel_size**-0.5 return ret
Example #28
Source File: modalities_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testSymbolModalityTargets(self): batch_size = 10 num_datashards = 5 length = 6 height = 7 hidden_size = 9 vocab_size = 11 model_hparams = common_hparams.basic_params1() model_hparams.hidden_size = hidden_size model_hparams.mode = tf.estimator.ModeKeys.TRAIN body_output = np.random.randint( 100, size=(batch_size, length, height, hidden_size)) targets = np.random.randint( vocab_size, size=(batch_size, length, height, 1)) data_parallelism = expert_utils.Parallelism( ["/device:CPU:0"] * num_datashards) sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) sharded_logits = data_parallelism( modalities.get_top(modalities.ModalityType.SYMBOL), sharded_body_output, sharded_targets, model_hparams, vocab_size) sharded_loss_num, sharded_loss_den = data_parallelism( modalities.get_loss(modalities.ModalityType.SYMBOL), sharded_logits, sharded_targets, model_hparams, vocab_size, modalities.get_weights_fn(modalities.ModalityType.SYMBOL)) train_loss = (tf.add_n(sharded_loss_num) / tf.maximum(1.0, tf.add_n(sharded_loss_den))) logits = tf.concat(sharded_logits, 0) self.evaluate(tf.global_variables_initializer()) res1, res2 = self.evaluate((logits, train_loss)) self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size)) self.assertEqual(res2.shape, ())
Example #29
Source File: modalities_test.py From tensor2tensor with Apache License 2.0 | 5 votes |
def testSymbolModalityTargetsFactored(self): batch_size = 10 num_datashards = 5 length = 6 height = 7 hidden_size = 9 vocab_size = 11 model_hparams = common_hparams.basic_params1() model_hparams.factored_logits = True model_hparams.hidden_size = hidden_size model_hparams.mode = tf.estimator.ModeKeys.TRAIN body_output = np.random.randint( 100, size=(batch_size, length, height, hidden_size)) targets = np.random.randint( vocab_size, size=(batch_size, length, height, 1)) data_parallelism = expert_utils.Parallelism( ["/device:CPU:0"] * num_datashards) with self.test_session() as session: sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) sharded_logits = data_parallelism( modalities.get_top(modalities.ModalityType.SYMBOL), sharded_body_output, sharded_targets, model_hparams, vocab_size) sharded_loss_num, sharded_loss_den = data_parallelism( modalities.get_loss(modalities.ModalityType.SYMBOL), sharded_logits, sharded_targets, model_hparams, vocab_size, modalities.get_weights_fn(modalities.ModalityType.SYMBOL)) train_loss = (tf.add_n(sharded_loss_num) / tf.maximum(1.0, tf.add_n(sharded_loss_den))) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res1, res2 = session.run((logits, train_loss)) self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size)) self.assertEqual(res2.shape, ())
Example #30
Source File: configurable_ops.py From morph-net with Apache License 2.0 | 5 votes |
def add_n(self, *args, **kwargs): return self._pass_through_mask_list('add_n', 'inputs', *args, **kwargs)