Python tensorflow.log_sigmoid() Examples

The following are 18 code examples of tensorflow.log_sigmoid(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function

Example #1

Source File: routing.py From CapsLayer with Apache License 2.0

6 votes

def M_step(log_R, log_activation, vote, lambda_val=0.01):
    R_shape = tf.shape(log_R)
    log_R = log_R + log_activation

    R_sum_i = cl.reduce_sum(tf.exp(log_R), axis=-3, keepdims=True)
    log_normalized_R = log_R - tf.reduce_logsumexp(log_R, axis=-3, keepdims=True)

    pose = cl.reduce_sum(vote * tf.exp(log_normalized_R), axis=-3, keepdims=True)
    log_var = tf.reduce_logsumexp(log_normalized_R + cl.log(tf.square(vote - pose)), axis=-3, keepdims=True)

    beta_v = tf.get_variable('beta_v',
                             shape=[1 for i in range(len(pose.shape) - 2)] + [pose.shape[-2], 1],
                             initializer=tf.truncated_normal_initializer(mean=15., stddev=3.))
    cost = R_sum_i * (beta_v + 0.5 * log_var)

    beta_a = tf.get_variable('beta_a',
                             shape=[1 for i in range(len(pose.shape) - 2)] + [pose.shape[-2], 1],
                             initializer=tf.truncated_normal_initializer(mean=100.0, stddev=10))
    cost_sum_h = cl.reduce_sum(cost, axis=-1, keepdims=True)
    logit = lambda_val * (beta_a - cost_sum_h)
    log_activation = tf.log_sigmoid(logit)

    return(pose, log_var, log_activation)

Example #2

Source File: discrim_net.py From imitation with MIT License

6 votes

def build_graph(self):
        phs, inps = networks.build_inputs(
            self._observation_space, self._action_space, scale=self._scale
        )
        self._obs_ph, self._act_ph, self._next_obs_ph, self._done_ph = phs
        self.obs_input, self.act_input, _, self.done_input = inps

        with tf.variable_scope("discrim_network"):
            self._disc_logits_gen_is_high, self._disc_mlp = self._build_discrim_net(
                [self.obs_input, self.act_input], **self._build_discrim_net_kwargs
            )
        self._policy_test_reward = self._policy_train_reward = -tf.log_sigmoid(
            self._disc_logits_gen_is_high
        )

        self._disc_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self._disc_logits_gen_is_high,
            labels=tf.cast(self.labels_gen_is_one_ph, tf.float32),
        )

Example #3

Source File: loss_ops.py From TensorflowFramework with BSD 3-Clause "New" or "Revised" License

6 votes

def gan_loss(x, gz, discriminator):
  """Original GAN loss.

  Args:
    x: Batch of real samples.
    gz: Batch of generated samples.
    discriminator: Discriminator function.
  Returns:
    d_loss: Discriminator loss.
    g_loss: Generator loss.
  """
  dx = discriminator(x)
  with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    dgz = discriminator(gz)
  d_loss = -tf.reduce_mean(tf.log_sigmoid(dx) + tf.log_sigmoid(1 - dgz))
  g_loss = -tf.reduce_mean(tf.log_sigmoid(dgz))
  return d_loss, g_loss

Example #4

Source File: logistic.py From flowpp with MIT License

5 votes

def logistic_logcdf(*, x, mean, logscale):
    """
    log cdf of logistic distribution
    this operates elementwise
    """
    z = (x - mean) * tf.exp(-logscale)
    return tf.log_sigmoid(z)

Example #5

Source File: train.py From fine-lm with MIT License

5 votes

def discriminator(encodings,
                  sequence_lengths,
                  lang_ids,
                  num_layers=3,
                  hidden_size=1024,
                  dropout=0.3):
  """Discriminates the encoder outputs against lang_ids.

  Args:
    encodings: The encoder outputs of shape [batch_size, max_time, hidden_size].
    sequence_lengths: The length of each sequence of shape [batch_size].
    lang_ids: The true lang id of each sequence of shape [batch_size].
    num_layers: The number of layers of the discriminator.
    hidden_size: The hidden size of the discriminator.
    dropout: The dropout to apply on each discriminator layer output.

  Returns:
    A tuple with: the discriminator loss (L_d) and the adversarial loss (L_adv).
  """
  x = encodings
  for _ in range(num_layers):
    x = tf.nn.dropout(x, 1.0 - dropout)
    x = tf.layers.dense(x, hidden_size, activation=tf.nn.leaky_relu)
  x = tf.nn.dropout(x, 1.0 - dropout)
  y = tf.layers.dense(x, 1)

  mask = tf.sequence_mask(
      sequence_lengths, maxlen=tf.shape(encodings)[1], dtype=tf.float32)
  mask = tf.expand_dims(mask, -1)

  y = tf.log_sigmoid(y) * mask
  y = tf.reduce_sum(y, axis=1)
  y = tf.exp(y)

  l_d = binary_cross_entropy(y, lang_ids, smoothing=0.1)
  l_adv = binary_cross_entropy(y, 1 - lang_ids)

  return l_d, l_adv

Example #6

Source File: logistic.py From flowpp with MIT License

5 votes

def logistic_logcdf(*, x, mean, logscale):
    """
    log cdf of logistic distribution
    this operates elementwise
    """
    z = (x - mean) * tf.exp(-logscale)
    return tf.log_sigmoid(z)

Example #7

Source File: GEM.py From DGFraud with Apache License 2.0

5 votes

def forward_propagation(self):
        with tf.variable_scope('gem_embedding'):
            h = tf.get_variable(name='init_embedding', shape=[self.nodes, self.encoding],
                                initializer=tf.contrib.layers.xavier_initializer())
            for i in range(0, self.hop):
                f = GEMLayer(self.placeholders, self.nodes, self.meta, self.embedding, self.encoding)
                gem_out = f(inputs=h)
                h = tf.reshape(gem_out, [self.nodes, self.encoding])
            print('GEM embedding over!')

        with tf.variable_scope('classification'):
            batch_data = tf.matmul(tf.one_hot(self.placeholders['batch_index'], self.nodes), h)
            W = tf.get_variable(name='weights',
                                shape=[self.encoding, self.class_size],
                                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.get_variable(name='bias', shape=[1, self.class_size], initializer=tf.zeros_initializer())
            tf.transpose(batch_data, perm=[0, 1])
            logits = tf.matmul(batch_data, W) + b

            u = tf.get_variable(name='u',
                                shape=[1, self.encoding],
                                initializer=tf.contrib.layers.xavier_initializer())

            loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=self.placeholders['t'], logits=logits)

            # TODO
            # loss = -tf.reduce_sum(
            #     tf.log_sigmoid(self.placeholders['t'] * tf.matmul(u, tf.transpose(batch_data, perm=[1, 0]))))

        # return loss, logits
        return loss, tf.nn.sigmoid(logits)

Example #8

Source File: binary.py From sonic_contest with MIT License

5 votes

def kl_divergence(self, param_batch_1, param_batch_2):
        probs_on = tf.sigmoid(param_batch_1)
        probs_off = tf.sigmoid(-param_batch_1)
        log_diff_on = tf.log_sigmoid(param_batch_1) - tf.log_sigmoid(param_batch_2)
        log_diff_off = tf.log_sigmoid(-param_batch_1) - tf.log_sigmoid(-param_batch_2)
        kls = probs_on*log_diff_on + probs_off*log_diff_off
        return tf.reduce_sum(kls, axis=-1)

Example #9

Source File: binary.py From sonic_contest with MIT License

5 votes

def entropy(self, param_batch):
        ent_on = tf.log_sigmoid(param_batch) * tf.sigmoid(param_batch)
        ent_off = tf.log_sigmoid(-param_batch) * tf.sigmoid(-param_batch)
        return tf.negative(tf.reduce_sum(ent_on + ent_off, axis=-1))

Example #10

Source File: binary.py From sonic_contest with MIT License

5 votes

def log_prob(self, param_batch, sample_vecs):
        sample_vecs = tf.cast(sample_vecs, param_batch.dtype)
        log_probs_on = tf.log_sigmoid(param_batch) * sample_vecs
        log_probs_off = tf.log_sigmoid(-param_batch) * (1-sample_vecs)
        return tf.reduce_sum(log_probs_on + log_probs_off, axis=-1)

Example #11

Source File: loss_functions.py From AmpliGraph with Apache License 2.0

5 votes

def _apply(self, y_true, y_pred):
        """ Apply the loss function.

        Parameters
        ----------
        y_true : tf.Tensor
            A tensor of true values.
        y_pred : tf.Tensor
            A tensor of predicted values.

       Returns
       -------
       loss : float
           The loss value that must be minimized.

       """

        if self._loss_parameters['label_smoothing'] is not None:
            y_true = tf.add((1 - self._loss_parameters['label_smoothing']) * y_true,
                            (self._loss_parameters['label_smoothing']) / self._loss_parameters['num_entities'])

        if self._loss_parameters['label_weighting']:

            eps = 1e-6
            wt = tf.reduce_mean(y_true)
            loss = -tf.reduce_sum((1 - wt) * y_true * tf.log_sigmoid(y_pred)
                                  + wt * (1 - y_true) * tf.log(1 - tf.sigmoid(y_pred) + eps))

        else:
            loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred))

        return loss

Example #12

Source File: loss_functions.py From AmpliGraph with Apache License 2.0

5 votes

def _apply(self, scores_pos, scores_neg):
        """Apply the loss function.

       Parameters
       ----------
       scores_pos : tf.Tensor, shape [n, 1]
           A tensor of scores assigned to positive statements.
       scores_neg : tf.Tensor, shape [n*negative_count, 1]
           A tensor of scores assigned to negative statements.

       Returns
       -------
       loss : tf.Tensor
           The loss value that must be minimized.

       """
        margin = tf.constant(self._loss_parameters['margin'], dtype=tf.float32, name='margin')
        alpha = tf.constant(self._loss_parameters['alpha'], dtype=tf.float32, name='alpha')

        # Compute p(neg_samples) based on eq 4
        scores_neg_reshaped = tf.reshape(scores_neg, [self._loss_parameters['eta'], tf.shape(scores_pos)[0]])
        p_neg = tf.nn.softmax(alpha * scores_neg_reshaped, axis=0)

        # Compute Loss based on eg 5
        loss = tf.reduce_sum(-tf.log_sigmoid(margin - tf.negative(scores_pos))) - tf.reduce_sum(
            tf.multiply(p_neg, tf.log_sigmoid(tf.negative(scores_neg_reshaped) - margin)))

        return loss

Example #13

Source File: CenterNet.py From CenterNet-tensorflow with MIT License

4 votes

def _keypoints_loss(self, keypoints, gbbox_yx, gbbox_y, gbbox_x, gbbox_h, gbbox_w,
                        classid, meshgrid_y, meshgrid_x, pshape):
        sigma = self._gaussian_radius(gbbox_h, gbbox_w, 0.7)
        gbbox_y = tf.reshape(gbbox_y, [-1, 1, 1])
        gbbox_x = tf.reshape(gbbox_x, [-1, 1, 1])
        sigma = tf.reshape(sigma, [-1, 1, 1])

        num_g = tf.shape(gbbox_y)[0]
        meshgrid_y = tf.expand_dims(meshgrid_y, 0)
        meshgrid_y = tf.tile(meshgrid_y, [num_g, 1, 1])
        meshgrid_x = tf.expand_dims(meshgrid_x, 0)
        meshgrid_x = tf.tile(meshgrid_x, [num_g, 1, 1])

        keyp_penalty_reduce = tf.exp(-((gbbox_y-meshgrid_y)**2 + (gbbox_x-meshgrid_x)**2)/(2*sigma**2))
        zero_like_keyp = tf.expand_dims(tf.zeros(pshape, dtype=tf.float32), axis=-1)
        reduction = []
        gt_keypoints = []
        for i in range(self.num_classes):
            exist_i = tf.equal(classid, i)
            reduce_i = tf.boolean_mask(keyp_penalty_reduce, exist_i, axis=0)
            reduce_i = tf.cond(
                tf.equal(tf.shape(reduce_i)[0], 0),
                lambda: zero_like_keyp,
                lambda: tf.expand_dims(tf.reduce_max(reduce_i, axis=0), axis=-1)
            )
            reduction.append(reduce_i)

            gbbox_yx_i = tf.boolean_mask(gbbox_yx, exist_i)
            gt_keypoints_i = tf.cond(
                tf.equal(tf.shape(gbbox_yx_i)[0], 0),
                lambda: zero_like_keyp,
                lambda: tf.expand_dims(tf.sparse.to_dense(tf.sparse.SparseTensor(gbbox_yx_i, tf.ones_like(gbbox_yx_i[..., 0], tf.float32), dense_shape=pshape), validate_indices=False),
                                       axis=-1)
            )
            gt_keypoints.append(gt_keypoints_i)
        reduction = tf.concat(reduction, axis=-1)
        gt_keypoints = tf.concat(gt_keypoints, axis=-1)
        keypoints_pos_loss = -tf.pow(1.-tf.sigmoid(keypoints), 2.) * tf.log_sigmoid(keypoints) * gt_keypoints
        keypoints_neg_loss = -tf.pow(1.-reduction, 4) * tf.pow(tf.sigmoid(keypoints), 2.) * (-keypoints+tf.log_sigmoid(keypoints)) * (1.-gt_keypoints)
        keypoints_loss = tf.reduce_sum(keypoints_pos_loss) / tf.cast(num_g, tf.float32) + tf.reduce_sum(keypoints_neg_loss) / tf.cast(num_g, tf.float32)
        return keypoints_loss

    # from cornernet

Example #14

Source File: classifier_adapter.py From BERT with Apache License 2.0

4 votes

def siamese_classifier(config, pooled_output, num_labels,
						labels, dropout_prob,
						ratio_weight=None):

	if config.get("output_layer", "interaction") == "interaction":
		print("==apply interaction layer==")
		repres_a = pooled_output[0]
		repres_b = pooled_output[1]

		output_layer = tf.concat([repres_a, repres_b, tf.abs(repres_a-repres_b), repres_a*repres_b], axis=-1)
		hidden_size = output_layer.shape[-1].value

		output_weights = tf.get_variable(
			"output_weights", [num_labels, hidden_size],
			initializer=tf.truncated_normal_initializer(stddev=0.02))

		output_bias = tf.get_variable(
			"output_bias", [num_labels], initializer=tf.zeros_initializer())

		output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

		logits = tf.matmul(output_layer, output_weights, transpose_b=True)
		logits = tf.nn.bias_add(logits, output_bias)

		print("==logits shape==", logits.get_shape())

		if config.get("label_type", "single_label") == "single_label":
			if config.get("loss", "entropy") == "entropy":
				per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
													logits=logits, 
													labels=tf.stop_gradient(labels))
			elif config.get("loss", "entropy") == "focal_loss":
				per_example_loss, _ = loss_utils.focal_loss_multi_v1(config,
															logits=logits, 
															labels=labels)
			print("==per_example_loss shape==", per_example_loss.get_shape())
			loss = tf.reduce_mean(per_example_loss)

			return (loss, per_example_loss, logits)
		elif config.get("label_type", "single_label") == "multi_label":
			logits = tf.log_sigmoid(logits)
			per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
													logits=logits, 
													labels=tf.stop_gradient(labels))
			per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
			loss = tf.reduce_mean(per_example_loss)
			return (loss, per_example_loss, logits)
		else:
			raise NotImplementedError()

Example #15

Source File: classifier_adapter.py From BERT with Apache License 2.0

4 votes

def distributed_classifier(config, pooled_output, 
						num_labels, labels,
						dropout_prob,
						ratio_weight=None):

	output_layer = pooled_output

	hidden_size = output_layer.shape[-1].value

	output_weights = tf.get_variable(
			"output_weights", [num_labels, hidden_size],
			initializer=tf.truncated_normal_initializer(stddev=0.02))

	output_bias = tf.get_variable(
			"output_bias", [num_labels], initializer=tf.zeros_initializer())

	output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

	logits = tf.matmul(output_layer, output_weights, transpose_b=True)
	logits = tf.nn.bias_add(logits, output_bias)

	if config.get("label_type", "single_label") == "single_label":
		if config.get("loss", "entropy") == "entropy":
			per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
												logits=logits, 
												labels=tf.stop_gradient(labels))
		elif config.get("loss", "entropy") == "focal_loss":
			per_example_loss = loss_utils.focal_loss_multi_v1(config,
														logits=logits, 
														labels=labels)
		loss = tf.reduce_mean(per_example_loss)

		return (loss, per_example_loss, logits)
	elif config.get("label_type", "single_label") == "multi_label":
		logits = tf.log_sigmoid(logits)
		per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
												logits=logits, 
												labels=tf.stop_gradient(labels))
		per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
		loss = tf.reduce_mean(per_example_loss)
		return (loss, per_example_loss, logits)
	else:
		raise NotImplementedError()

Example #16

Source File: CenterNet.py From Object-Detection-API-Tensorflow with MIT License

4 votes

def _keypoints_loss(self, keypoints, gbbox_yx, gbbox_y, gbbox_x, gbbox_h, gbbox_w,
                        classid, meshgrid_y, meshgrid_x, pshape):
        sigma = self._gaussian_radius(gbbox_h, gbbox_w, 0.7)
        gbbox_y = tf.reshape(gbbox_y, [-1, 1, 1])
        gbbox_x = tf.reshape(gbbox_x, [-1, 1, 1])
        sigma = tf.reshape(sigma, [-1, 1, 1])

        num_g = tf.shape(gbbox_y)[0]
        meshgrid_y = tf.expand_dims(meshgrid_y, 0)
        meshgrid_y = tf.tile(meshgrid_y, [num_g, 1, 1])
        meshgrid_x = tf.expand_dims(meshgrid_x, 0)
        meshgrid_x = tf.tile(meshgrid_x, [num_g, 1, 1])

        keyp_penalty_reduce = tf.exp(-((gbbox_y-meshgrid_y)**2 + (gbbox_x-meshgrid_x)**2)/(2*sigma**2))
        zero_like_keyp = tf.expand_dims(tf.zeros(pshape, dtype=tf.float32), axis=-1)
        reduction = []
        gt_keypoints = []
        for i in range(self.num_classes):
            exist_i = tf.equal(classid, i)
            reduce_i = tf.boolean_mask(keyp_penalty_reduce, exist_i, axis=0)
            reduce_i = tf.cond(
                tf.equal(tf.shape(reduce_i)[0], 0),
                lambda: zero_like_keyp,
                lambda: tf.expand_dims(tf.reduce_max(reduce_i, axis=0), axis=-1)
            )
            reduction.append(reduce_i)

            gbbox_yx_i = tf.boolean_mask(gbbox_yx, exist_i)
            gt_keypoints_i = tf.cond(
                tf.equal(tf.shape(gbbox_yx_i)[0], 0),
                lambda: zero_like_keyp,
                lambda: tf.expand_dims(tf.sparse.to_dense(tf.sparse.SparseTensor(gbbox_yx_i, tf.ones_like(gbbox_yx_i[..., 0], tf.float32), dense_shape=pshape), validate_indices=False),
                                       axis=-1)
            )
            gt_keypoints.append(gt_keypoints_i)
        reduction = tf.concat(reduction, axis=-1)
        gt_keypoints = tf.concat(gt_keypoints, axis=-1)
        keypoints_pos_loss = -tf.pow(1.-tf.sigmoid(keypoints), 2.) * tf.log_sigmoid(keypoints) * gt_keypoints
        keypoints_neg_loss = -tf.pow(1.-reduction, 4) * tf.pow(tf.sigmoid(keypoints), 2.) * (-keypoints+tf.log_sigmoid(keypoints)) * (1.-gt_keypoints)
        keypoints_loss = tf.reduce_sum(keypoints_pos_loss) / tf.cast(num_g, tf.float32) + tf.reduce_sum(keypoints_neg_loss) / tf.cast(num_g, tf.float32)
        return keypoints_loss

    # from cornernet

Example #17

Source File: classifier.py From BERT with Apache License 2.0

4 votes

def siamese_classifier(config, pooled_output, num_labels,
						labels, dropout_prob,
						ratio_weight=None):

	if config.get("output_layer", "interaction") == "interaction":
		print("==apply interaction layer==")
		repres_a = pooled_output[0]
		repres_b = pooled_output[1]

		output_layer = tf.concat([repres_a, repres_b, tf.abs(repres_a-repres_b), repres_a*repres_b], axis=-1)
		hidden_size = output_layer.shape[-1].value

		output_weights = tf.get_variable(
			"output_weights", [num_labels, hidden_size],
			initializer=tf.truncated_normal_initializer(stddev=0.02))

		output_bias = tf.get_variable(
			"output_bias", [num_labels], initializer=tf.zeros_initializer())

		output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

		logits = tf.matmul(output_layer, output_weights, transpose_b=True)
		logits = tf.nn.bias_add(logits, output_bias)

		print("==logits shape==", logits.get_shape())

		if config.get("label_type", "single_label") == "single_label":
			if config.get("loss", "entropy") == "entropy":
				# per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
				# 									logits=logits, 
				# 									labels=tf.stop_gradient(labels))

				one_hot_labels = tf.one_hot(labels, num_labels)
				per_example_loss = tf.nn.softmax_cross_entropy_with_logits(
								logits=logits,
								labels=tf.stop_gradient(one_hot_labels),
								)

			elif config.get("loss", "entropy") == "focal_loss":
				per_example_loss, _ = loss_utils.focal_loss_multi_v1(config,
															logits=logits, 
															labels=labels)
			print("==per_example_loss shape==", per_example_loss.get_shape())
			loss = tf.reduce_mean(per_example_loss)

			return (loss, per_example_loss, logits)
		elif config.get("label_type", "single_label") == "multi_label":
			# logits = tf.log_sigmoid(logits)
			per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
													logits=logits, 
													labels=tf.stop_gradient(labels))
			per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
			loss = tf.reduce_mean(per_example_loss)
			return (loss, per_example_loss, logits)
		else:
			raise NotImplementedError()

Example #18

Source File: classifier.py From BERT with Apache License 2.0

4 votes

def distributed_classifier(config, pooled_output, 
						num_labels, labels,
						dropout_prob,
						ratio_weight=None):

	output_layer = pooled_output

	hidden_size = output_layer.shape[-1].value

	output_weights = tf.get_variable(
			"output_weights", [num_labels, hidden_size],
			initializer=tf.truncated_normal_initializer(stddev=0.02))

	output_bias = tf.get_variable(
			"output_bias", [num_labels], initializer=tf.zeros_initializer())

	output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

	logits = tf.matmul(output_layer, output_weights, transpose_b=True)
	logits = tf.nn.bias_add(logits, output_bias)

	if config.get("label_type", "single_label") == "single_label":
		if config.get("loss", "entropy") == "entropy":
			# per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
			# 									logits=logits, 
			# 									labels=tf.stop_gradient(labels))

			one_hot_labels = tf.one_hot(labels, num_labels)
			per_example_loss = tf.nn.softmax_cross_entropy_with_logits(
								logits=logits,
								labels=tf.stop_gradient(one_hot_labels),
								)
			
		elif config.get("loss", "entropy") == "focal_loss":
			per_example_loss = loss_utils.focal_loss_multi_v1(config,
														logits=logits, 
														labels=labels)
		loss = tf.reduce_mean(per_example_loss)

		return (loss, per_example_loss, logits)
	elif config.get("label_type", "single_label") == "multi_label":
		# logits = tf.log_sigmoid(logits)
		per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
												logits=logits, 
												labels=tf.stop_gradient(labels))
		per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
		loss = tf.reduce_mean(per_example_loss)
		return (loss, per_example_loss, logits)
	else:
		raise NotImplementedError()