Python tensorflow.add_n() Examples

The following are 30 code examples of tensorflow.add_n(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: tfutil.py    From disentangling_conditional_gans with MIT License 6 votes vote down vote up
def finalize_autosummaries():
    global _autosummary_finalized
    if _autosummary_finalized:
        return
    _autosummary_finalized = True
    init_uninited_vars([var for vars in _autosummary_vars.values() for var in vars])
    with tf.device(None), tf.control_dependencies(None):
        for name, vars in _autosummary_vars.items():
            id = name.replace('/', '_')
            with absolute_name_scope('Autosummary/' + id):
                sum = tf.add_n(vars)
                avg = sum[0] / sum[1]
                with tf.control_dependencies([avg]): # read before resetting
                    reset_ops = [tf.assign(var, tf.zeros(2)) for var in vars]
                    with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting
                        tf.summary.scalar(name, avg)

# Internal helper for creating autosummary accumulators. 
Example #2
Source File: critic.py    From reinforcement_learning with MIT License 6 votes vote down vote up
def __init__(self, state_size, action_size, lr, n_h1=400, n_h2=300, tau=0.001):
    self.state_size = state_size
    self.action_size = action_size
    self.optimizer = tf.train.AdamOptimizer(lr)
    self.tau = tau

    self.n_h1 = n_h1
    self.n_h2 = n_h2

    self.input_s, self.action, self.critic_variables, self.q_value = self._build_network("critic")
    self.input_s_target, self.action_target, self.critic_variables_target, self.q_value_target = self._build_network("critic_target")

    self.target = tf.placeholder(tf.float32, [None])
    self.l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in self.critic_variables])
    self.loss = tf.reduce_mean(tf.square(self.target - self.q_value)) + 0.01*self.l2_loss
    self.optimize = self.optimizer.minimize(self.loss)
    self.update_target_op = [self.critic_variables_target[i].assign(tf.multiply(self.critic_variables[i], self.tau) + tf.multiply(self.critic_variables_target[i], 1 - self.tau)) for i in range(len(self.critic_variables))]
    self.action_gradients = tf.gradients(self.q_value, self.action) 
Example #3
Source File: component.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def add_regularizer(self, cost):
    """Adds L2 regularization for parameters which have it turned on.

    Args:
      cost: float cost before regularization.

    Returns:
      Updated cost optionally including regularization.
    """
    if self.network is None:
      return cost
    regularized_weights = self.network.get_l2_regularized_weights()
    if not regularized_weights:
      return cost
    l2_coeff = self.master.hyperparams.l2_regularization_coefficient
    if l2_coeff == 0.0:
      return cost
    tf.logging.info('[%s] Regularizing parameters: %s', self.name,
                    [w.name for w in regularized_weights])
    l2_costs = [tf.nn.l2_loss(p) for p in regularized_weights]
    return tf.add(cost, l2_coeff * tf.add_n(l2_costs), name='regularizer') 
Example #4
Source File: fcn8s_tensorflow.py    From fcn8s_tensorflow with GNU General Public License v3.0 6 votes vote down vote up
def _build_optimizer(self):
        '''
        Builds the training-relevant part of the graph.
        '''

        with tf.name_scope('optimizer'):
            # Create a training step counter.
            global_step = tf.Variable(0, trainable=False, name='global_step')
            # Create placeholder for the learning rate.
            learning_rate = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate')
            # Compute the regularizatin loss.
            regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) # This is a list of the individual loss values, so we still need to sum them up.
            regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') # Scalar
            # Compute the total loss.
            approximation_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=self.fcn8s_output), name='approximation_loss') # Scalar
            total_loss = tf.add(approximation_loss, regularization_loss, name='total_loss')
            # Compute the gradients and apply them.
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='adam_optimizer')
            train_op = optimizer.minimize(total_loss, global_step=global_step, name='train_op')

        return total_loss, train_op, learning_rate, global_step 
Example #5
Source File: reinforce_w_baseline.py    From reinforcement_learning with MIT License 6 votes vote down vote up
def _build_policy_net(self):
    """Build policy network"""
    with tf.variable_scope(self.scope):
      self.state_input = tf.placeholder(tf.float32, [None, self.state_size])
      self.action = tf.placeholder(tf.int32, [None])
      self.target = tf.placeholder(tf.float32, [None])

      layer_1 = tf_utils.fc(self.state_input, self.n_hidden_1, tf.nn.relu)
      layer_2 = tf_utils.fc(layer_1, self.n_hidden_2, tf.nn.relu)

      self.value = tf_utils.fc(layer_2, 1)

      self.action_values = tf_utils.fc(layer_2, self.action_size)
      action_mask = tf.one_hot(self.action, self.action_size, 1.0, 0.0)
      self.action_value_pred = tf.reduce_sum(tf.nn.softmax(self.action_values) * action_mask, 1)
      
      self.action_probs = tf.nn.softmax(self.action_values)
      self.value_loss = tf.reduce_mean(tf.square(self.target - self.value))
      self.pg_loss = tf.reduce_mean(-tf.log(self.action_value_pred) * (self.target - self.value))
      self.l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables()  ]) 
      self.loss = self.pg_loss + 5*self.value_loss + 0.002 * self.l2_loss
      
      self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
      self.train_op = self.optimizer.minimize(self.loss, global_step=tf.contrib.framework.get_global_step()) 
Example #6
Source File: reinforce.py    From reinforcement_learning with MIT License 6 votes vote down vote up
def _build_policy_net(self):
    """Build policy network"""
    with tf.variable_scope(self.scope):
      self.state_input = tf.placeholder(tf.float32, [None, self.state_size])
      self.action = tf.placeholder(tf.int32, [None])
      self.target = tf.placeholder(tf.float32, [None])

      layer_1 = tf_utils.fc(self.state_input, self.n_hidden_1, tf.nn.relu)
      layer_2 = tf_utils.fc(layer_1, self.n_hidden_2, tf.nn.relu)

      self.action_values = tf_utils.fc(layer_2, self.action_size)
      action_mask = tf.one_hot(self.action, self.action_size, 1.0, 0.0)
      self.action_prob = tf.nn.softmax(self.action_values)
      self.action_value_pred = tf.reduce_sum(self.action_prob * action_mask, 1)

      # l2 regularization
      self.l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables()  ]) 
      self.pg_loss = tf.reduce_mean(-tf.log(self.action_value_pred) * self.target)

      self.loss = self.pg_loss + 0.002 * self.l2_loss
      self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
      self.train_op = self.optimizer.minimize(self.loss, global_step=tf.contrib.framework.get_global_step()) 
Example #7
Source File: blocks_std.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def _Apply(self, *args):
    if not self._matrices:
      self._matrices = [
          self._linear_block_factory(self._depth, self._initializer)
          for _ in args]

    if len(self._matrices) != len(args):
      raise ValueError('{} expected {} inputs, but observed {} inputs'.format(
          self.name, len(self._matrices), len(args)))

    if len(args) > 1:
      y = tf.add_n([m(x) for m, x in zip(self._matrices, args)])
    else:
      y = self._matrices[0](args[0])

    return self._act(self._bias(y)) 
Example #8
Source File: ac_net.py    From reinforcement_learning with MIT License 6 votes vote down vote up
def __init__(self, state_size, action_size, lr,
               name, n_h1=400, n_h2=300, global_name='global'):

    self.state_size = state_size
    self.action_size = action_size
    self.name = name
    self.n_h1 = n_h1
    self.n_h2 = n_h2

    self.optimizer = tf.train.AdamOptimizer(lr)
    self.input_s, self.input_a, self.advantage, self.target_v, self.policy, self.value, self.action_est, self.model_variables = self._build_network(
        name)

    # 0.5, 0.2, 1.0
    self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.target_v - tf.reshape(self.value, [-1])))
    self.entropy_loss = 1.0 * tf.reduce_sum(self.policy * tf.log(self.policy))
    self.policy_loss = 1.0 * tf.reduce_sum(-tf.log(self.action_est) * self.advantage)
    self.l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in self.model_variables])
    # self.loss = 0.5 * self.value_loss + self.policy_loss + 0.2 * self.entropy_loss
    self.loss = self.value_loss + self.policy_loss + self.entropy_loss
    self.gradients = tf.gradients(self.loss, self.model_variables)
    if name != global_name:
      self.var_norms = tf.global_norm(self.model_variables)
      global_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, global_name)
      self.apply_gradients = self.optimizer.apply_gradients(zip(self.gradients, global_variables)) 
Example #9
Source File: optimize.py    From fine-lm with MIT License 6 votes vote down vote up
def weight_decay(decay_rate, var_list, skip_biases=True):
  """Apply weight decay to vars in var_list."""
  if not decay_rate:
    return 0.

  tf.logging.info("Applying weight decay, decay_rate: %0.5f", decay_rate)

  weight_decays = []
  for v in var_list:
    # Weight decay.
    # This is a heuristic way to detect biases that works for main tf.layers.
    is_bias = len(v.shape.as_list()) == 1 and v.name.endswith("bias:0")
    if not (skip_biases and is_bias):
      with tf.device(v.device):
        v_loss = tf.nn.l2_loss(v)
      weight_decays.append(v_loss)

  return tf.add_n(weight_decays) * decay_rate 
Example #10
Source File: imagenet_utils.py    From ghostnet with Apache License 2.0 6 votes vote down vote up
def build_graph(self, image, label):
        image = ImageNetModel.image_preprocess(image, bgr=self.image_bgr)
        assert self.data_format in ['NCHW', 'NHWC']
        if self.data_format == 'NCHW':
            image = tf.transpose(image, [0, 3, 1, 2])

        logits = self.get_logits(image)
        print('self.label_smoothing', self.label_smoothing)
        loss = ImageNetModel.compute_loss_and_error(logits, label, self.label_smoothing)

        if self.weight_decay > 0:
            wd_loss = regularize_cost(self.weight_decay_pattern,
                                      tf.contrib.layers.l2_regularizer(self.weight_decay),
                                      name='l2_regularize_loss')
            add_moving_summary(loss, wd_loss)
            total_cost = tf.add_n([loss, wd_loss], name='cost')
        else:
            total_cost = tf.identity(loss, name='cost')
            add_moving_summary(total_cost)

        if self.loss_scale != 1.:
            logger.info("Scaling the total loss by {} ...".format(self.loss_scale))
            return total_cost * self.loss_scale
        else:
            return total_cost 
Example #11
Source File: decoder.py    From neural-combinatorial-optimization-rl-tensorflow with MIT License 6 votes vote down vote up
def loop_decode(self):
        # decoder_initial_state: Tuple Tensor (c,h) of size [batch_size x cell.state_size]
        # decoder_first_input: Tensor [batch_size x cell.state_size]

        # Loop the decoding process and collect results
        s,i = self.decoder_initial_state,  tf.cast(self.decoder_first_input,tf.float32)
        for step in range(self.seq_length):
            s, i = self.decode(s,i,step)

        # Return to start
        self.positions.append(self.first_city)

        # Stack visited indices
        self.positions=tf.stack(self.positions,axis=1)  # [Batch,seq_length+1]

        # Sum log_softmax over output steps
        self.log_softmax=tf.add_n(self.log_softmax)  # [Batch,seq_length]

        # Stack attending & pointing distribution
        self.attending=tf.stack(self.attending,axis=1) # [Batch,seq_length,seq_length]
        self.pointing=tf.stack(self.pointing,axis=1) # [Batch,seq_length,seq_length]
        
        # Return stacked lists of visited_indices and log_softmax for backprop
        return self.positions,self.log_softmax 
Example #12
Source File: t2t_model.py    From fine-lm with MIT License 6 votes vote down vote up
def average_sharded_losses(sharded_losses):
  """Average losses across datashards.

  Args:
    sharded_losses: list<dict<str loss_name, Tensor loss>>. The loss
      can be a single Tensor or a 2-tuple (numerator and denominator).

  Returns:
    losses: dict<str loss_name, Tensor avg_loss>
  """
  losses = {}
  for loss_name in sorted(sharded_losses[0]):
    all_shards = [shard_losses[loss_name] for shard_losses in sharded_losses]
    if isinstance(all_shards[0], tuple):
      sharded_num, sharded_den = zip(*all_shards)
      mean_loss = (
          tf.add_n(sharded_num) / tf.maximum(
              tf.cast(1.0, sharded_den[0].dtype), tf.add_n(sharded_den)))
    else:
      mean_loss = tf.reduce_mean(all_shards)

    losses[loss_name] = mean_loss
  return losses 
Example #13
Source File: yellowfin.py    From fine-lm with MIT License 6 votes vote down vote up
def _grad_sparsity(self):
    """Gradient sparsity."""
    # If the sparse minibatch gradient has 10 percent of its entries
    # non-zero, its sparsity is 0.1.
    # The norm of dense gradient averaged from full dataset
    # are roughly estimated norm of minibatch
    # sparse gradient norm * sqrt(sparsity)
    # An extension maybe only correct the sparse blob.
    non_zero_cnt = tf.add_n([tf.count_nonzero(g) for g in self._grad])
    all_entry_cnt = tf.add_n([tf.size(g) for g in self._grad])
    self._sparsity = tf.cast(non_zero_cnt, self._grad[0].dtype)
    self._sparsity /= tf.cast(all_entry_cnt, self._grad[0].dtype)
    avg_op = self._moving_averager.apply([self._sparsity,])
    with tf.control_dependencies([avg_op]):
      self._sparsity_avg = self._moving_averager.average(self._sparsity)
    return avg_op 
Example #14
Source File: value_functions.py    From HardRLWithYoutube with MIT License 6 votes vote down vote up
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613
        X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations
        vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg')
        wd_dict = {}
        h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
        h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
        vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0]
        sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n))
        wd_loss = tf.get_collection("vf_losses", None)
        loss = tf.reduce_mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss)
        loss_sampled = tf.reduce_mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n)))
        self._predict = U.function([X], vpred_n)
        optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \
                                    clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \
                                    async=1, kfac_update=2, cold_iter=50, \
                                    weight_decay_dict=wd_dict, max_grad_norm=None)
        vf_var_list = []
        for var in tf.trainable_variables():
            if "vf" in var.name:
                vf_var_list.append(var)

        update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list)
        self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101
        U.initialize() # Initialize uninitialized TF variables 
Example #15
Source File: loss.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fprop(self, x, y, **kwargs):
        weights, loss_objects = safe_zip(*self.terms)
        for weight in weights:
            if isinstance(weight, float):
                continue
            if hasattr(weight, 'ndim'):
                assert weight.ndim == 0
                continue
            raise TypeError("weight of %s is not a type that this function "
                            "knows it can accept yet" % str(weight))
        losses = [loss.fprop(x, y, **kwargs) for loss in loss_objects]
        for loss, loss_object in safe_zip(losses, loss_objects):
            if len(loss.get_shape()) > 0:
                raise ValueError("%s.fprop returned a non-scalar value" %
                                 str(loss_object))
        terms = [weight * loss for weight, loss in safe_zip(weights, losses)]

        return tf.add_n(terms) 
Example #16
Source File: resnet_tf.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _decay(self):
        """L2 weight decay loss."""
        if self.decay_cost is not None:
            return self.decay_cost

        costs = []
        if self.device_name is None:
            for var in tf.trainable_variables():
                if var.op.name.find(r'DW') > 0:
                    costs.append(tf.nn.l2_loss(var))
        else:
            for layer in self.layers:
                for var in layer.params_device[self.device_name].values():
                    if (isinstance(var, tf.Variable) and
                            var.op.name.find(r'DW') > 0):
                        costs.append(tf.nn.l2_loss(var))

        self.decay_cost = tf.multiply(self.hps.weight_decay_rate,
                                      tf.add_n(costs))
        return self.decay_cost 
Example #17
Source File: value_functions.py    From lirpg with MIT License 6 votes vote down vote up
def __init__(self, ob_dim, ac_dim): #pylint: disable=W0613
        X = tf.placeholder(tf.float32, shape=[None, ob_dim*2+ac_dim*2+2]) # batch of observations
        vtarg_n = tf.placeholder(tf.float32, shape=[None], name='vtarg')
        wd_dict = {}
        h1 = tf.nn.elu(dense(X, 64, "h1", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
        h2 = tf.nn.elu(dense(h1, 64, "h2", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict))
        vpred_n = dense(h2, 1, "hfinal", weight_init=U.normc_initializer(1.0), bias_init=0, weight_loss_dict=wd_dict)[:,0]
        sample_vpred_n = vpred_n + tf.random_normal(tf.shape(vpred_n))
        wd_loss = tf.get_collection("vf_losses", None)
        loss = tf.reduce_mean(tf.square(vpred_n - vtarg_n)) + tf.add_n(wd_loss)
        loss_sampled = tf.reduce_mean(tf.square(vpred_n - tf.stop_gradient(sample_vpred_n)))
        self._predict = U.function([X], vpred_n)
        optim = kfac.KfacOptimizer(learning_rate=0.001, cold_lr=0.001*(1-0.9), momentum=0.9, \
                                    clip_kl=0.3, epsilon=0.1, stats_decay=0.95, \
                                    async=1, kfac_update=2, cold_iter=50, \
                                    weight_decay_dict=wd_dict, max_grad_norm=None)
        vf_var_list = []
        for var in tf.trainable_variables():
            if "vf" in var.name:
                vf_var_list.append(var)

        update_op, self.q_runner = optim.minimize(loss, loss_sampled, var_list=vf_var_list)
        self.do_update = U.function([X, vtarg_n], update_op) #pylint: disable=E1101
        U.initialize() # Initialize uninitialized TF variables 
Example #18
Source File: enas_search_space.py    From deep_architect with MIT License 6 votes vote down vote up
def concatenate_skip_layers(h_connects, weight_sharer):

    def compile_fn(di, dh):

        def fn(di, is_training=True):
            inputs = [
                di['in' + str(i)]
                for i in range(len(dh))
                if dh['select_' + str(i)]
            ]
            inputs.append(di['in' + str(len(dh))])
            with tf.device('/gpu:0'):
                out = tf.add_n(inputs)
                return {'out': tf.add_n(inputs)}

        return fn

    return TFEM(
        'SkipConcat',
        {'select_' + str(i): h_connects[i] for i in range(len(h_connects))},
        compile_fn, ['in' + str(i) for i in range(len(h_connects) + 1)],
        ['out']).get_io() 
Example #19
Source File: distributions.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 5 votes vote down vote up
def entropy(self):
        return tf.add_n([p.entropy() for p in self.categoricals]) 
Example #20
Source File: distributions.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 5 votes vote down vote up
def kl(self, other):
        return tf.add_n([p.kl(q) for p, q in zip(self.categoricals, other.categoricals)]) 
Example #21
Source File: distributions.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 5 votes vote down vote up
def neglogp(self, x):
        return tf.add_n([p.neglogp(px) for p, px in zip(self.categoricals, tf.unstack(x, axis=-1))]) 
Example #22
Source File: distributions.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 5 votes vote down vote up
def entropy(self):
        return tf.add_n([p.entropy() for p in self.categoricals]) 
Example #23
Source File: distributions.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 5 votes vote down vote up
def neglogp(self, x):
        return tf.add_n([p.neglogp(px) for p, px in zip(self.categoricals, tf.unstack(x, axis=-1))]) 
Example #24
Source File: model_deploy.py    From STORK with MIT License 5 votes vote down vote up
def _sum_clones_gradients(clone_grads):
  """Calculate the sum gradient for each shared variable across all clones.

  This function assumes that the clone_grads has been scaled appropriately by
  1 / num_clones.

  Args:
    clone_grads: A List of List of tuples (gradient, variable), one list per
    `Clone`.

  Returns:
     List of tuples of (gradient, variable) where the gradient has been summed
     across all clones.
  """
  sum_grads = []
  for grad_and_vars in zip(*clone_grads):
    # Note that each grad_and_vars looks like the following:
    #   ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN))
    grads = []
    var = grad_and_vars[0][1]
    for g, v in grad_and_vars:
      assert v == var
      if g is not None:
        grads.append(g)
    if grads:
      if len(grads) > 1:
        sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads')
      else:
        sum_grad = grads[0]
      sum_grads.append((sum_grad, var))
  return sum_grads 
Example #25
Source File: distributions.py    From lirpg with MIT License 5 votes vote down vote up
def neglogp(self, x):
        return tf.add_n([p.neglogp(px) for p, px in zip(self.categoricals, tf.unstack(x, axis=-1))]) 
Example #26
Source File: distributions.py    From lirpg with MIT License 5 votes vote down vote up
def kl(self, other):
        return tf.add_n([p.kl(q) for p, q in zip(self.categoricals, other.categoricals)]) 
Example #27
Source File: distributions.py    From lirpg with MIT License 5 votes vote down vote up
def entropy(self):
        return tf.add_n([p.entropy() for p in self.categoricals]) 
Example #28
Source File: distributions.py    From HardRLWithYoutube with MIT License 5 votes vote down vote up
def entropy(self):
        return tf.add_n([p.entropy() for p in self.categoricals]) 
Example #29
Source File: distributions.py    From HardRLWithYoutube with MIT License 5 votes vote down vote up
def kl(self, other):
        return tf.add_n([p.kl(q) for p, q in zip(self.categoricals, other.categoricals)]) 
Example #30
Source File: distributions.py    From HardRLWithYoutube with MIT License 5 votes vote down vote up
def neglogp(self, x):
        return tf.add_n([p.neglogp(px) for p, px in zip(self.categoricals, tf.unstack(x, axis=-1))])