Python tensorflow.log_sigmoid() Examples

The following are 18 code examples of tensorflow.log_sigmoid(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow , or try the search function .
Example #1
Source File: routing.py    From CapsLayer with Apache License 2.0 6 votes vote down vote up
def M_step(log_R, log_activation, vote, lambda_val=0.01):
    R_shape = tf.shape(log_R)
    log_R = log_R + log_activation

    R_sum_i = cl.reduce_sum(tf.exp(log_R), axis=-3, keepdims=True)
    log_normalized_R = log_R - tf.reduce_logsumexp(log_R, axis=-3, keepdims=True)

    pose = cl.reduce_sum(vote * tf.exp(log_normalized_R), axis=-3, keepdims=True)
    log_var = tf.reduce_logsumexp(log_normalized_R + cl.log(tf.square(vote - pose)), axis=-3, keepdims=True)

    beta_v = tf.get_variable('beta_v',
                             shape=[1 for i in range(len(pose.shape) - 2)] + [pose.shape[-2], 1],
                             initializer=tf.truncated_normal_initializer(mean=15., stddev=3.))
    cost = R_sum_i * (beta_v + 0.5 * log_var)

    beta_a = tf.get_variable('beta_a',
                             shape=[1 for i in range(len(pose.shape) - 2)] + [pose.shape[-2], 1],
                             initializer=tf.truncated_normal_initializer(mean=100.0, stddev=10))
    cost_sum_h = cl.reduce_sum(cost, axis=-1, keepdims=True)
    logit = lambda_val * (beta_a - cost_sum_h)
    log_activation = tf.log_sigmoid(logit)

    return(pose, log_var, log_activation) 
Example #2
Source File: discrim_net.py    From imitation with MIT License 6 votes vote down vote up
def build_graph(self):
        phs, inps = networks.build_inputs(
            self._observation_space, self._action_space, scale=self._scale
        )
        self._obs_ph, self._act_ph, self._next_obs_ph, self._done_ph = phs
        self.obs_input, self.act_input, _, self.done_input = inps

        with tf.variable_scope("discrim_network"):
            self._disc_logits_gen_is_high, self._disc_mlp = self._build_discrim_net(
                [self.obs_input, self.act_input], **self._build_discrim_net_kwargs
            )
        self._policy_test_reward = self._policy_train_reward = -tf.log_sigmoid(
            self._disc_logits_gen_is_high
        )

        self._disc_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self._disc_logits_gen_is_high,
            labels=tf.cast(self.labels_gen_is_one_ph, tf.float32),
        ) 
Example #3
Source File: loss_ops.py    From TensorflowFramework with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def gan_loss(x, gz, discriminator):
  """Original GAN loss.

  Args:
    x: Batch of real samples.
    gz: Batch of generated samples.
    discriminator: Discriminator function.
  Returns:
    d_loss: Discriminator loss.
    g_loss: Generator loss.
  """
  dx = discriminator(x)
  with tf.variable_scope(tf.get_variable_scope(), reuse=True):
    dgz = discriminator(gz)
  d_loss = -tf.reduce_mean(tf.log_sigmoid(dx) + tf.log_sigmoid(1 - dgz))
  g_loss = -tf.reduce_mean(tf.log_sigmoid(dgz))
  return d_loss, g_loss 
Example #4
Source File: logistic.py    From flowpp with MIT License 5 votes vote down vote up
def logistic_logcdf(*, x, mean, logscale):
    """
    log cdf of logistic distribution
    this operates elementwise
    """
    z = (x - mean) * tf.exp(-logscale)
    return tf.log_sigmoid(z) 
Example #5
Source File: train.py    From fine-lm with MIT License 5 votes vote down vote up
def discriminator(encodings,
                  sequence_lengths,
                  lang_ids,
                  num_layers=3,
                  hidden_size=1024,
                  dropout=0.3):
  """Discriminates the encoder outputs against lang_ids.

  Args:
    encodings: The encoder outputs of shape [batch_size, max_time, hidden_size].
    sequence_lengths: The length of each sequence of shape [batch_size].
    lang_ids: The true lang id of each sequence of shape [batch_size].
    num_layers: The number of layers of the discriminator.
    hidden_size: The hidden size of the discriminator.
    dropout: The dropout to apply on each discriminator layer output.

  Returns:
    A tuple with: the discriminator loss (L_d) and the adversarial loss (L_adv).
  """
  x = encodings
  for _ in range(num_layers):
    x = tf.nn.dropout(x, 1.0 - dropout)
    x = tf.layers.dense(x, hidden_size, activation=tf.nn.leaky_relu)
  x = tf.nn.dropout(x, 1.0 - dropout)
  y = tf.layers.dense(x, 1)

  mask = tf.sequence_mask(
      sequence_lengths, maxlen=tf.shape(encodings)[1], dtype=tf.float32)
  mask = tf.expand_dims(mask, -1)

  y = tf.log_sigmoid(y) * mask
  y = tf.reduce_sum(y, axis=1)
  y = tf.exp(y)

  l_d = binary_cross_entropy(y, lang_ids, smoothing=0.1)
  l_adv = binary_cross_entropy(y, 1 - lang_ids)

  return l_d, l_adv 
Example #6
Source File: logistic.py    From flowpp with MIT License 5 votes vote down vote up
def logistic_logcdf(*, x, mean, logscale):
    """
    log cdf of logistic distribution
    this operates elementwise
    """
    z = (x - mean) * tf.exp(-logscale)
    return tf.log_sigmoid(z) 
Example #7
Source File: GEM.py    From DGFraud with Apache License 2.0 5 votes vote down vote up
def forward_propagation(self):
        with tf.variable_scope('gem_embedding'):
            h = tf.get_variable(name='init_embedding', shape=[self.nodes, self.encoding],
                                initializer=tf.contrib.layers.xavier_initializer())
            for i in range(0, self.hop):
                f = GEMLayer(self.placeholders, self.nodes, self.meta, self.embedding, self.encoding)
                gem_out = f(inputs=h)
                h = tf.reshape(gem_out, [self.nodes, self.encoding])
            print('GEM embedding over!')

        with tf.variable_scope('classification'):
            batch_data = tf.matmul(tf.one_hot(self.placeholders['batch_index'], self.nodes), h)
            W = tf.get_variable(name='weights',
                                shape=[self.encoding, self.class_size],
                                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.get_variable(name='bias', shape=[1, self.class_size], initializer=tf.zeros_initializer())
            tf.transpose(batch_data, perm=[0, 1])
            logits = tf.matmul(batch_data, W) + b

            u = tf.get_variable(name='u',
                                shape=[1, self.encoding],
                                initializer=tf.contrib.layers.xavier_initializer())

            loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=self.placeholders['t'], logits=logits)

            # TODO
            # loss = -tf.reduce_sum(
            #     tf.log_sigmoid(self.placeholders['t'] * tf.matmul(u, tf.transpose(batch_data, perm=[1, 0]))))

        # return loss, logits
        return loss, tf.nn.sigmoid(logits) 
Example #8
Source File: binary.py    From sonic_contest with MIT License 5 votes vote down vote up
def kl_divergence(self, param_batch_1, param_batch_2):
        probs_on = tf.sigmoid(param_batch_1)
        probs_off = tf.sigmoid(-param_batch_1)
        log_diff_on = tf.log_sigmoid(param_batch_1) - tf.log_sigmoid(param_batch_2)
        log_diff_off = tf.log_sigmoid(-param_batch_1) - tf.log_sigmoid(-param_batch_2)
        kls = probs_on*log_diff_on + probs_off*log_diff_off
        return tf.reduce_sum(kls, axis=-1) 
Example #9
Source File: binary.py    From sonic_contest with MIT License 5 votes vote down vote up
def entropy(self, param_batch):
        ent_on = tf.log_sigmoid(param_batch) * tf.sigmoid(param_batch)
        ent_off = tf.log_sigmoid(-param_batch) * tf.sigmoid(-param_batch)
        return tf.negative(tf.reduce_sum(ent_on + ent_off, axis=-1)) 
Example #10
Source File: binary.py    From sonic_contest with MIT License 5 votes vote down vote up
def log_prob(self, param_batch, sample_vecs):
        sample_vecs = tf.cast(sample_vecs, param_batch.dtype)
        log_probs_on = tf.log_sigmoid(param_batch) * sample_vecs
        log_probs_off = tf.log_sigmoid(-param_batch) * (1-sample_vecs)
        return tf.reduce_sum(log_probs_on + log_probs_off, axis=-1) 
Example #11
Source File: loss_functions.py    From AmpliGraph with Apache License 2.0 5 votes vote down vote up
def _apply(self, y_true, y_pred):
        """ Apply the loss function.

        Parameters
        ----------
        y_true : tf.Tensor
            A tensor of true values.
        y_pred : tf.Tensor
            A tensor of predicted values.

       Returns
       -------
       loss : float
           The loss value that must be minimized.

       """

        if self._loss_parameters['label_smoothing'] is not None:
            y_true = tf.add((1 - self._loss_parameters['label_smoothing']) * y_true,
                            (self._loss_parameters['label_smoothing']) / self._loss_parameters['num_entities'])

        if self._loss_parameters['label_weighting']:

            eps = 1e-6
            wt = tf.reduce_mean(y_true)
            loss = -tf.reduce_sum((1 - wt) * y_true * tf.log_sigmoid(y_pred)
                                  + wt * (1 - y_true) * tf.log(1 - tf.sigmoid(y_pred) + eps))

        else:
            loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred))

        return loss 
Example #12
Source File: loss_functions.py    From AmpliGraph with Apache License 2.0 5 votes vote down vote up
def _apply(self, scores_pos, scores_neg):
        """Apply the loss function.

       Parameters
       ----------
       scores_pos : tf.Tensor, shape [n, 1]
           A tensor of scores assigned to positive statements.
       scores_neg : tf.Tensor, shape [n*negative_count, 1]
           A tensor of scores assigned to negative statements.

       Returns
       -------
       loss : tf.Tensor
           The loss value that must be minimized.

       """
        margin = tf.constant(self._loss_parameters['margin'], dtype=tf.float32, name='margin')
        alpha = tf.constant(self._loss_parameters['alpha'], dtype=tf.float32, name='alpha')

        # Compute p(neg_samples) based on eq 4
        scores_neg_reshaped = tf.reshape(scores_neg, [self._loss_parameters['eta'], tf.shape(scores_pos)[0]])
        p_neg = tf.nn.softmax(alpha * scores_neg_reshaped, axis=0)

        # Compute Loss based on eg 5
        loss = tf.reduce_sum(-tf.log_sigmoid(margin - tf.negative(scores_pos))) - tf.reduce_sum(
            tf.multiply(p_neg, tf.log_sigmoid(tf.negative(scores_neg_reshaped) - margin)))

        return loss 
Example #13
Source File: CenterNet.py    From CenterNet-tensorflow with MIT License 4 votes vote down vote up
def _keypoints_loss(self, keypoints, gbbox_yx, gbbox_y, gbbox_x, gbbox_h, gbbox_w,
                        classid, meshgrid_y, meshgrid_x, pshape):
        sigma = self._gaussian_radius(gbbox_h, gbbox_w, 0.7)
        gbbox_y = tf.reshape(gbbox_y, [-1, 1, 1])
        gbbox_x = tf.reshape(gbbox_x, [-1, 1, 1])
        sigma = tf.reshape(sigma, [-1, 1, 1])

        num_g = tf.shape(gbbox_y)[0]
        meshgrid_y = tf.expand_dims(meshgrid_y, 0)
        meshgrid_y = tf.tile(meshgrid_y, [num_g, 1, 1])
        meshgrid_x = tf.expand_dims(meshgrid_x, 0)
        meshgrid_x = tf.tile(meshgrid_x, [num_g, 1, 1])

        keyp_penalty_reduce = tf.exp(-((gbbox_y-meshgrid_y)**2 + (gbbox_x-meshgrid_x)**2)/(2*sigma**2))
        zero_like_keyp = tf.expand_dims(tf.zeros(pshape, dtype=tf.float32), axis=-1)
        reduction = []
        gt_keypoints = []
        for i in range(self.num_classes):
            exist_i = tf.equal(classid, i)
            reduce_i = tf.boolean_mask(keyp_penalty_reduce, exist_i, axis=0)
            reduce_i = tf.cond(
                tf.equal(tf.shape(reduce_i)[0], 0),
                lambda: zero_like_keyp,
                lambda: tf.expand_dims(tf.reduce_max(reduce_i, axis=0), axis=-1)
            )
            reduction.append(reduce_i)

            gbbox_yx_i = tf.boolean_mask(gbbox_yx, exist_i)
            gt_keypoints_i = tf.cond(
                tf.equal(tf.shape(gbbox_yx_i)[0], 0),
                lambda: zero_like_keyp,
                lambda: tf.expand_dims(tf.sparse.to_dense(tf.sparse.SparseTensor(gbbox_yx_i, tf.ones_like(gbbox_yx_i[..., 0], tf.float32), dense_shape=pshape), validate_indices=False),
                                       axis=-1)
            )
            gt_keypoints.append(gt_keypoints_i)
        reduction = tf.concat(reduction, axis=-1)
        gt_keypoints = tf.concat(gt_keypoints, axis=-1)
        keypoints_pos_loss = -tf.pow(1.-tf.sigmoid(keypoints), 2.) * tf.log_sigmoid(keypoints) * gt_keypoints
        keypoints_neg_loss = -tf.pow(1.-reduction, 4) * tf.pow(tf.sigmoid(keypoints), 2.) * (-keypoints+tf.log_sigmoid(keypoints)) * (1.-gt_keypoints)
        keypoints_loss = tf.reduce_sum(keypoints_pos_loss) / tf.cast(num_g, tf.float32) + tf.reduce_sum(keypoints_neg_loss) / tf.cast(num_g, tf.float32)
        return keypoints_loss

    # from cornernet 
Example #14
Source File: classifier_adapter.py    From BERT with Apache License 2.0 4 votes vote down vote up
def siamese_classifier(config, pooled_output, num_labels,
						labels, dropout_prob,
						ratio_weight=None):

	if config.get("output_layer", "interaction") == "interaction":
		print("==apply interaction layer==")
		repres_a = pooled_output[0]
		repres_b = pooled_output[1]

		output_layer = tf.concat([repres_a, repres_b, tf.abs(repres_a-repres_b), repres_a*repres_b], axis=-1)
		hidden_size = output_layer.shape[-1].value

		output_weights = tf.get_variable(
			"output_weights", [num_labels, hidden_size],
			initializer=tf.truncated_normal_initializer(stddev=0.02))

		output_bias = tf.get_variable(
			"output_bias", [num_labels], initializer=tf.zeros_initializer())

		output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

		logits = tf.matmul(output_layer, output_weights, transpose_b=True)
		logits = tf.nn.bias_add(logits, output_bias)

		print("==logits shape==", logits.get_shape())

		if config.get("label_type", "single_label") == "single_label":
			if config.get("loss", "entropy") == "entropy":
				per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
													logits=logits, 
													labels=tf.stop_gradient(labels))
			elif config.get("loss", "entropy") == "focal_loss":
				per_example_loss, _ = loss_utils.focal_loss_multi_v1(config,
															logits=logits, 
															labels=labels)
			print("==per_example_loss shape==", per_example_loss.get_shape())
			loss = tf.reduce_mean(per_example_loss)

			return (loss, per_example_loss, logits)
		elif config.get("label_type", "single_label") == "multi_label":
			logits = tf.log_sigmoid(logits)
			per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
													logits=logits, 
													labels=tf.stop_gradient(labels))
			per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
			loss = tf.reduce_mean(per_example_loss)
			return (loss, per_example_loss, logits)
		else:
			raise NotImplementedError() 
Example #15
Source File: classifier_adapter.py    From BERT with Apache License 2.0 4 votes vote down vote up
def distributed_classifier(config, pooled_output, 
						num_labels, labels,
						dropout_prob,
						ratio_weight=None):

	output_layer = pooled_output

	hidden_size = output_layer.shape[-1].value

	output_weights = tf.get_variable(
			"output_weights", [num_labels, hidden_size],
			initializer=tf.truncated_normal_initializer(stddev=0.02))

	output_bias = tf.get_variable(
			"output_bias", [num_labels], initializer=tf.zeros_initializer())

	output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

	logits = tf.matmul(output_layer, output_weights, transpose_b=True)
	logits = tf.nn.bias_add(logits, output_bias)

	if config.get("label_type", "single_label") == "single_label":
		if config.get("loss", "entropy") == "entropy":
			per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
												logits=logits, 
												labels=tf.stop_gradient(labels))
		elif config.get("loss", "entropy") == "focal_loss":
			per_example_loss = loss_utils.focal_loss_multi_v1(config,
														logits=logits, 
														labels=labels)
		loss = tf.reduce_mean(per_example_loss)

		return (loss, per_example_loss, logits)
	elif config.get("label_type", "single_label") == "multi_label":
		logits = tf.log_sigmoid(logits)
		per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
												logits=logits, 
												labels=tf.stop_gradient(labels))
		per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
		loss = tf.reduce_mean(per_example_loss)
		return (loss, per_example_loss, logits)
	else:
		raise NotImplementedError() 
Example #16
Source File: CenterNet.py    From Object-Detection-API-Tensorflow with MIT License 4 votes vote down vote up
def _keypoints_loss(self, keypoints, gbbox_yx, gbbox_y, gbbox_x, gbbox_h, gbbox_w,
                        classid, meshgrid_y, meshgrid_x, pshape):
        sigma = self._gaussian_radius(gbbox_h, gbbox_w, 0.7)
        gbbox_y = tf.reshape(gbbox_y, [-1, 1, 1])
        gbbox_x = tf.reshape(gbbox_x, [-1, 1, 1])
        sigma = tf.reshape(sigma, [-1, 1, 1])

        num_g = tf.shape(gbbox_y)[0]
        meshgrid_y = tf.expand_dims(meshgrid_y, 0)
        meshgrid_y = tf.tile(meshgrid_y, [num_g, 1, 1])
        meshgrid_x = tf.expand_dims(meshgrid_x, 0)
        meshgrid_x = tf.tile(meshgrid_x, [num_g, 1, 1])

        keyp_penalty_reduce = tf.exp(-((gbbox_y-meshgrid_y)**2 + (gbbox_x-meshgrid_x)**2)/(2*sigma**2))
        zero_like_keyp = tf.expand_dims(tf.zeros(pshape, dtype=tf.float32), axis=-1)
        reduction = []
        gt_keypoints = []
        for i in range(self.num_classes):
            exist_i = tf.equal(classid, i)
            reduce_i = tf.boolean_mask(keyp_penalty_reduce, exist_i, axis=0)
            reduce_i = tf.cond(
                tf.equal(tf.shape(reduce_i)[0], 0),
                lambda: zero_like_keyp,
                lambda: tf.expand_dims(tf.reduce_max(reduce_i, axis=0), axis=-1)
            )
            reduction.append(reduce_i)

            gbbox_yx_i = tf.boolean_mask(gbbox_yx, exist_i)
            gt_keypoints_i = tf.cond(
                tf.equal(tf.shape(gbbox_yx_i)[0], 0),
                lambda: zero_like_keyp,
                lambda: tf.expand_dims(tf.sparse.to_dense(tf.sparse.SparseTensor(gbbox_yx_i, tf.ones_like(gbbox_yx_i[..., 0], tf.float32), dense_shape=pshape), validate_indices=False),
                                       axis=-1)
            )
            gt_keypoints.append(gt_keypoints_i)
        reduction = tf.concat(reduction, axis=-1)
        gt_keypoints = tf.concat(gt_keypoints, axis=-1)
        keypoints_pos_loss = -tf.pow(1.-tf.sigmoid(keypoints), 2.) * tf.log_sigmoid(keypoints) * gt_keypoints
        keypoints_neg_loss = -tf.pow(1.-reduction, 4) * tf.pow(tf.sigmoid(keypoints), 2.) * (-keypoints+tf.log_sigmoid(keypoints)) * (1.-gt_keypoints)
        keypoints_loss = tf.reduce_sum(keypoints_pos_loss) / tf.cast(num_g, tf.float32) + tf.reduce_sum(keypoints_neg_loss) / tf.cast(num_g, tf.float32)
        return keypoints_loss

    # from cornernet 
Example #17
Source File: classifier.py    From BERT with Apache License 2.0 4 votes vote down vote up
def siamese_classifier(config, pooled_output, num_labels,
						labels, dropout_prob,
						ratio_weight=None):

	if config.get("output_layer", "interaction") == "interaction":
		print("==apply interaction layer==")
		repres_a = pooled_output[0]
		repres_b = pooled_output[1]

		output_layer = tf.concat([repres_a, repres_b, tf.abs(repres_a-repres_b), repres_a*repres_b], axis=-1)
		hidden_size = output_layer.shape[-1].value

		output_weights = tf.get_variable(
			"output_weights", [num_labels, hidden_size],
			initializer=tf.truncated_normal_initializer(stddev=0.02))

		output_bias = tf.get_variable(
			"output_bias", [num_labels], initializer=tf.zeros_initializer())

		output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

		logits = tf.matmul(output_layer, output_weights, transpose_b=True)
		logits = tf.nn.bias_add(logits, output_bias)

		print("==logits shape==", logits.get_shape())

		if config.get("label_type", "single_label") == "single_label":
			if config.get("loss", "entropy") == "entropy":
				# per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
				# 									logits=logits, 
				# 									labels=tf.stop_gradient(labels))

				one_hot_labels = tf.one_hot(labels, num_labels)
				per_example_loss = tf.nn.softmax_cross_entropy_with_logits(
								logits=logits,
								labels=tf.stop_gradient(one_hot_labels),
								)

			elif config.get("loss", "entropy") == "focal_loss":
				per_example_loss, _ = loss_utils.focal_loss_multi_v1(config,
															logits=logits, 
															labels=labels)
			print("==per_example_loss shape==", per_example_loss.get_shape())
			loss = tf.reduce_mean(per_example_loss)

			return (loss, per_example_loss, logits)
		elif config.get("label_type", "single_label") == "multi_label":
			# logits = tf.log_sigmoid(logits)
			per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
													logits=logits, 
													labels=tf.stop_gradient(labels))
			per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
			loss = tf.reduce_mean(per_example_loss)
			return (loss, per_example_loss, logits)
		else:
			raise NotImplementedError() 
Example #18
Source File: classifier.py    From BERT with Apache License 2.0 4 votes vote down vote up
def distributed_classifier(config, pooled_output, 
						num_labels, labels,
						dropout_prob,
						ratio_weight=None):

	output_layer = pooled_output

	hidden_size = output_layer.shape[-1].value

	output_weights = tf.get_variable(
			"output_weights", [num_labels, hidden_size],
			initializer=tf.truncated_normal_initializer(stddev=0.02))

	output_bias = tf.get_variable(
			"output_bias", [num_labels], initializer=tf.zeros_initializer())

	output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob)

	logits = tf.matmul(output_layer, output_weights, transpose_b=True)
	logits = tf.nn.bias_add(logits, output_bias)

	if config.get("label_type", "single_label") == "single_label":
		if config.get("loss", "entropy") == "entropy":
			# per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
			# 									logits=logits, 
			# 									labels=tf.stop_gradient(labels))

			one_hot_labels = tf.one_hot(labels, num_labels)
			per_example_loss = tf.nn.softmax_cross_entropy_with_logits(
								logits=logits,
								labels=tf.stop_gradient(one_hot_labels),
								)
			
		elif config.get("loss", "entropy") == "focal_loss":
			per_example_loss = loss_utils.focal_loss_multi_v1(config,
														logits=logits, 
														labels=labels)
		loss = tf.reduce_mean(per_example_loss)

		return (loss, per_example_loss, logits)
	elif config.get("label_type", "single_label") == "multi_label":
		# logits = tf.log_sigmoid(logits)
		per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
												logits=logits, 
												labels=tf.stop_gradient(labels))
		per_example_loss = tf.reduce_mean(per_example_loss, axis=-1)
		loss = tf.reduce_mean(per_example_loss)
		return (loss, per_example_loss, logits)
	else:
		raise NotImplementedError()