Python keras.backend.pow() Examples

The following are code examples for showing how to use keras.backend.pow(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: keras_extension   Author: k1414st   File: activations.py    MIT License 6 votes vote down vote up
def gelu(x, approximate='tanh'):
    """Gaussian Error Linear Unit.
    (implementation of https://arxiv.org/abs/1606.08415)

    # Arguments
        x: Input tensor.
        approximate: Approximation method of GELU.
            "tanh" or "sigmoid" (defaults to "tanh".)

    # Returns
        The gaussian error unit activation: x * Phi(x)
            where Phi(x) = P(X <= x), X ~ Normal(0, 1)
            (approximated by "tanh" or "sigmoid")
    """
    if not approximate in ('tanh', 'sigmoid'):
        raise ValueError('approximate must be in ("tanh", "sigmoid")')
    if approximate == 'tanh':
        return 0.5*x * (1 + K.tanh(np.sqrt(2/np.pi) * \
                                   (x + 0.044715*K.pow(x, 3))))
    else:
        return x * K.sigmoid(1.702*x) 
Example 2
Project: keras_extension   Author: k1414st   File: activations.py    MIT License 6 votes vote down vote up
def gelu(x, approximate='tanh'):
    """Gaussian Error Linear Unit.
    (implementation of https://arxiv.org/abs/1606.08415)

    # Arguments
        x: Input tensor.
        approximate: Approximation method of GELU.
            "tanh" or "sigmoid" (defaults to "tanh".)

    # Returns
        The gaussian error unit activation: x * Phi(x)
            where Phi(x) = P(X <= x), X ~ Normal(0, 1)
            (approximated by "tanh" or "sigmoid")
    """
    if not approximate in ('tanh', 'sigmoid'):
        raise ValueError('approximate must be in ("tanh", "sigmoid")')
    if approximate == 'tanh':
        return 0.5*x * (1 + K.tanh(np.sqrt(2/np.pi) * \
                                   (x + 0.044715*K.pow(x, 3))))
    else:
        return x * K.sigmoid(1.702*x) 
Example 3
Project: Trident-Segmentation-CNN   Author: YalongLiu   File: metrics.py    MIT License 6 votes vote down vote up
def self_adaptive_balance_loss(y_true, y_pred):
    # Hyper parameters
    gamma = 1  # For hard segmentation

    # Original focal loss for segmentation
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))

    loss_0 = -K.pow(pt_0, gamma) * K.log(1. - pt_0 + K.epsilon())
    loss_1 = -K.pow(1. - pt_1, gamma) * K.log(K.epsilon() + pt_1)
    sum_0 = tf.reduce_sum(loss_0)
    sum_1 = tf.reduce_sum(loss_1)

    alpha = (sum_0 / (sum_0 + sum_1)) * 0.4 + 0.5
    b_loss_0 = (1 - alpha) * loss_0  # background loss after balance
    b_loss_1 = alpha * loss_1  # foreground loss after balance
    sum_2 = tf.reduce_sum(b_loss_0)
    sum_3 = tf.reduce_sum(b_loss_1)

    b_loss_1 = tf.Print(b_loss_1, [sum_0, sum_1, alpha, sum_2, sum_3],
                        message='loss_0 loss_1 alpha sum2 sum3:')

    loss = b_loss_0 + b_loss_1
    return loss 
Example 4
Project: SPECT-CT-Seg-ResUNet-Keras   Author: junyuchen245   File: custom_losses.py    MIT License 6 votes vote down vote up
def exp_dice_loss(exp=1.0):
    """
    :param exp: exponent. 1.0 for no exponential effect, i.e. log Dice.
    """

    def inner(y_true, y_pred):
        """Computes the average exponential log Dice coefficients as the loss function.
        :param y_true: one-hot tensor multiplied by label weights, (batch size, number of pixels, number of labels).
        :param y_pred: softmax probabilities, same shape as y_true. Each probability serves as a partial volume.
        :return: average exponential log Dice coefficient.
        """

        dice = dice_coef(y_true, y_pred)
        #dice = generalized_dice(y_true, y_pred, exp)
        dice = K.clip(dice, K.epsilon(), 1 - K.epsilon())  # As log is used
        dice = K.pow(-K.log(dice), exp)
        if K.ndim(dice) == 2:
            dice = K.mean(dice, axis=-1)
        return dice

    return inner 
Example 5
Project: NTM-Keras   Author: SigmaQuan   File: memory.py    MIT License 6 votes vote down vote up
def sharpen(_weight_t, scalar_gama_t):
    '''
    The convolution operation in convolutional shift can cause leakage or
    dispersion of weights over time if the shift weighting is no sharp.
    For example, if shift of -1, 0 and 1 are given weights of 0.1, 0.8,
    and 0.1, the rotation will transform a weighting focused at single
    point into one slightly blurred over three points. To combat this,
    each head emits one further scalar \gama >= 1 whose effect is sharpen
    the final weighting as follows:
    $$w_{i}^{(t)} = \frac{(\hat{w}_{i}^{(t)})^{\gama}}
    {\sum_{j}\hat{w}_{j}^{(t)})^{\gama}}$$
    :param _weight_t: the weight vector which denotes a memory address.
    :param scalar_gama_t: the scalar for sharpen.
    :return: the sharpened weight.
    '''
    weight_t = K.pow(_weight_t, scalar_gama_t)
    return weight_t / K.sum(weight_t) 
Example 6
Project: AutoNlp   Author: upwindflys   File: model.py    MIT License 6 votes vote down vote up
def categorical_focal_loss_fixed(y_true, y_pred):
    """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred: A tensor resulting from a softmax
        :return: Output tensor.
        """

    gamma = 2.
    alpha = .25
    # Scale predictions so that the class probas of each sample sum to 1
    y_pred /= K.sum(y_pred, axis=-1, keepdims=True)

    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)

    # Calculate Cross Entropy
    cross_entropy = -y_true * K.log(y_pred)

    # Calculate Focal Loss
    loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy

    # Sum the losses in mini_batch
    return K.sum(loss, axis=1) 
Example 7
Project: keras-lookahead   Author: CyberZHG   File: optimizers.py    MIT License 6 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = self.learning_rate * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            self.updates.append(K.update_sub(p, p_t))
        return self.updates 
Example 8
Project: qkeras   Author: google   File: qlayers.py    Apache License 2.0 6 votes vote down vote up
def stochastic_round_po2(x):
  """Performs stochastic rounding for the power of two."""
  y = K.abs(x)
  eps = K.epsilon()
  log2 = K.log(2.0)
  x_log2 = K.round(K.log(y + eps) / log2)
  sign = K.sign(x)
  po2 = K.cast(K.pow(2.0, K.cast(x_log2, dtype="float32")), dtype="float32")
  left_val = tf.where(po2 > y, x_log2 - 1, x_log2)
  right_val = tf.where(po2 > y, x_log2, x_log2 + 1)
  # sampling in [2**left_val, 2**right_val].
  minval = 2 ** left_val
  maxval = 2 ** right_val
  val = K.random_uniform(K.shape(y), minval=minval, maxval=maxval)
  # use y as a threshold to keep the probabliy [2**left_val, y, 2**right_val]
  # so that the mean value of the sample should be y
  x_po2 = tf.where(y < val, left_val, right_val)
  return x_po2 
Example 9
Project: qkeras   Author: google   File: qlayers.py    Apache License 2.0 6 votes vote down vote up
def __call__(self, x):
    """Computes fixedpoint quantization of x."""

    unsigned_bits = self.bits - self.keep_negative

    # quantized_bits with "1" bit becomes a binary implementation.

    if unsigned_bits > 0:
      m = pow(2, unsigned_bits)
      m_i = pow(2, self.integer)
      p = x * m / m_i
      xq = m_i * K.clip(
          _round_through(p, self.use_stochastic_rounding),
          self.keep_negative * (-m + self.symmetric), m - 1) / m
    else:
      xq = K.sign(x)
      xq += (1.0 - K.abs(xq))
      if not self.keep_negative:
        xq = (xq + 1.0) / 2.0
    return x + K.stop_gradient(-x + xq) 
Example 10
Project: qkeras   Author: google   File: qlayers.py    Apache License 2.0 6 votes vote down vote up
def __call__(self, x):
    non_sign_bits = self.bits - 1
    min_exp = -2**(non_sign_bits - 1)
    max_exp = 2**(non_sign_bits - 1) - 1
    eps = K.epsilon()
    if min_exp < np.log2(eps):
      warnings.warn(
          "QKeras: min_exp in po2 quantizer is smaller than K.epsilon()")

    if self.max_value != -1:
      max_exp = np.round(np.log2(self.max_value + eps))

    x_sign = K.sign(x)
    x_sign += (1.0 - K.abs(x_sign))
    log2 = np.log(2.0)

    if self.use_stochastic_rounding:
      x_log2 = stochastic_round_po2(x)
    else:
      x_log2 = _round_through(K.log(K.abs(x) + eps) / log2)
    return x + K.stop_gradient(
        -x + x_sign * K.pow(2.0, K.clip(x_log2, min_exp, max_exp))) 
Example 11
Project: IrrMapper   Author: dgketchum   File: losses.py    Apache License 2.0 6 votes vote down vote up
def binary_focal_loss(gamma=2, alpha=0.25):
    ''' 
    Focal loss:

       FL (pt) = -(1-pt)^gamma * log(pt)
       where
       pt = p if y==1
            1-p otherwise
    '''

    def bfl(y_true, y_pred):
        mask = tf.not_equal(y_true, -1) # true where the mask isn't==-1
        y_pred = tf.nn.sigmoid(y_pred)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        epsilon = K.epsilon()
        # clip to prevent NaN's and Inf's
        pt_1 = K.clip(pt_1, epsilon, 1. - epsilon)
        pt_0 = K.clip(pt_0, epsilon, 1. - epsilon)
        pt_1 = tf.boolean_mask(pt_1, mask)
        pt_0 = tf.boolean_mask(pt_0, mask)

        return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) \
               -K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
    return bfl 
Example 12
Project: IrrMapper   Author: dgketchum   File: losses.py    Apache License 2.0 6 votes vote down vote up
def multiclass_focal_loss(gamma=2, alpha=0.25):


    def multiclass_FL(y_true, y_pred):
        y_true_sum = tf.math.reduce_sum(y_true, axis=-1)
        mask = tf.not_equal(y_true_sum, 0)
        # y_true = tf.boolean_mask(y_true, mask)
        # y_pred = tf.boolean_mask(y_pred, mask)
        # probabilities = tf.nn.softmax(y_pred)
        # xen = -y_true * tf.math.log(probabilities) # all 0s where y_true is all 0s
        # loss = alpha*tf.math.pow(1-probabilities, gamma) * xen 
        # return tf.math.reduce_mean(loss)
        probabilities = tf.nn.softmax(y_pred, axis=-1)
        xen = -y_true * tf.math.log(probabilities) # all 0s where y_true is all 0s
        complement = tf.dtypes.cast(tf.equal(y_true, 0), tf.float32)
        negative_probabilities = -tf.math.pow(complement*probabilities,
                gamma)*tf.math.log(complement)
        masked_xen = tf.boolean_mask(xen, mask)
        masked_complement = tf.boolean_mask(xen, negative_probabilities)
        return tf.reduce_mean(masked_xen) + tf.reduce_mean(masked_complement)

    return multiclass_FL 
Example 13
Project: nlp_toolkit   Author: stevewyl   File: position_embedding.py    MIT License 6 votes vote down vote up
def call(self, x):
        if (self.size is None) or (self.mode == 'sum'):
            self.size = int(x.shape[-1])
        batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
        position_j = 1. / K.pow(10000.,
                                2 * K.arange(self.size / 2, dtype='float32'
                                             ) / self.size)
        position_j = K.expand_dims(position_j, 0)
        # K.arange不支持变长,只好用这种方法生成
        position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1
        position_i = K.expand_dims(position_i, 2)
        position_ij = K.dot(position_i, position_j)
        position_ij = K.concatenate(
            [K.cos(position_ij), K.sin(position_ij)], 2)
        if self.mode == 'sum':
            return position_ij + x
        elif self.mode == 'concat':
            return K.concatenate([position_ij, x], 2) 
Example 14
Project: Deep-Spectral-Clustering-using-Dual-Autoencoder-Network   Author: xdxuyang   File: costs.py    MIT License 6 votes vote down vote up
def full_affinity(X, scale):
    '''
    Calculates the symmetrized full Gaussian affinity matrix, scaled
    by a provided scale

    X:              input dataset of size n
    scale:          provided scale

    returns:        n x n affinity matrix
    '''
    sigma = K.variable(scale)
    Dx = squared_distance(X)
    sigma_squared = K.pow(sigma, 2)
    sigma_squared = K.expand_dims(sigma_squared, -1)
    Dx_scaled = Dx / (2 * sigma_squared)
    W = K.exp(-Dx_scaled)
    return W 
Example 15
Project: faceswap   Author: deepfakes   File: losses.py    GNU General Public License v3.0 6 votes vote down vote up
def generalized_loss(y_true, y_pred, alpha=1.0, beta=1.0/255.0):
    """
    generalized function used to return a large variety of mathematical loss functions
    primary benefit is smooth, differentiable version of L1 loss

    Barron, J. A More General Robust Loss Function
    https://arxiv.org/pdf/1701.03077.pdf
    Parameters:
        alpha: penalty factor. larger number give larger weight to large deviations
        beta: scale factor used to adjust to the input scale (i.e. inputs of mean 1e-4 or 256 )
    Return:
        a loss value from the results of function(y_pred - y_true)
    Example:
        a=1.0, x>>c , c=1.0/255.0 will give a smoothly differentiable version of L1 / MAE loss
        a=1.999999 (lim as a->2), beta=1.0/255.0 will give L2 / RMSE loss
    """
    diff = y_pred - y_true
    second = (K.pow(K.pow(diff/beta, 2.) / K.abs(2.-alpha) + 1., (alpha/2.)) - 1.)
    loss = (K.abs(2.-alpha)/alpha) * second
    loss = K.mean(loss, axis=-1) * beta
    return loss 
Example 16
Project: focal-loss-keras   Author: mkocabas   File: focal_loss.py    MIT License 5 votes vote down vote up
def focal_loss(gamma=2., alpha=.25):
	def focal_loss_fixed(y_true, y_pred):
		pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
		pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
		return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
	return focal_loss_fixed 
Example 17
Project: keras_radam   Author: forin-xyz   File: test_radam.py    MIT License 5 votes vote down vote up
def gelu(x):
    return 0.5 * x * (1 + K.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * K.pow(x, 3)))) 
Example 18
Project: keras-lamb   Author: CyberZHG   File: optimizer.py    MIT License 5 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1
        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            mhat_t = m_t / (1. - K.pow(self.beta_1, t))
            vhat_t = v_t / (1. - K.pow(self.beta_2, t))

            u_t = mhat_t / K.sqrt(vhat_t + self.epsilon) + self.weight_decay * p
            trust_ratio = K.sqrt(K.sum(K.square(p)) / K.sum(K.square(u_t)))
            trust_ratio = K.minimum(K.maximum(trust_ratio, self.lower_bound), self.upper_bound)

            lr_p = trust_ratio * lr
            new_p = p - lr_p * u_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 19
Project: trVAE   Author: theislab   File: _utils.py    MIT License 5 votes vote down vote up
def compute_kernel(x, y, kernel='rbf', **kwargs):
    """
        Computes RBF kernel between x and y.
        # Parameters
            x: Tensor
                Tensor with shape [batch_size, z_dim]
            y: Tensor
                Tensor with shape [batch_size, z_dim]
        # Returns
            returns the computed RBF kernel between x and y
    """
    scales = kwargs.get("scales", [])
    if kernel == "rbf":
        x_size = K.shape(x)[0]
        y_size = K.shape(y)[0]
        dim = K.shape(x)[1]
        tiled_x = K.tile(K.reshape(x, K.stack([x_size, 1, dim])), K.stack([1, y_size, 1]))
        tiled_y = K.tile(K.reshape(y, K.stack([1, y_size, dim])), K.stack([x_size, 1, 1]))
        return K.exp(-K.mean(K.square(tiled_x - tiled_y), axis=2) / K.cast(dim, tf.float32))
    elif kernel == 'raphy':
        scales = K.variable(value=np.asarray(scales))
        squared_dist = K.expand_dims(squared_distance(x, y), 0)
        scales = K.expand_dims(K.expand_dims(scales, -1), -1)
        weights = K.eval(K.shape(scales)[0])
        weights = K.variable(value=np.asarray(weights))
        weights = K.expand_dims(K.expand_dims(weights, -1), -1)
        return K.sum(weights * K.exp(-squared_dist / (K.pow(scales, 2))), 0)
    elif kernel == "multi-scale-rbf":
        sigmas = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 5, 10, 15, 20, 25, 30, 35, 100, 1e3, 1e4, 1e5, 1e6]

        beta = 1. / (2. * (K.expand_dims(sigmas, 1)))
        distances = squared_distance(x, y)
        s = K.dot(beta, K.reshape(distances, (1, -1)))

        return K.reshape(tf.reduce_sum(tf.exp(-s), 0), K.shape(distances)) / len(sigmas) 
Example 20
Project: FaceLandmarks   Author: JACKYLUO1991   File: loss.py    Apache License 2.0 5 votes vote down vote up
def smoothL1(y_true, y_pred):
    """
    More robust to noise
    """
    THRESHOLD = K.variable(1.0)
    mae = K.abs(y_true - y_pred)
    flag = K.greater(mae, THRESHOLD)
    loss = K.mean(K.switch(flag, (mae - 0.5), K.pow(mae, 2)), axis=-1)

    return loss 
Example 21
Project: BERT   Author: yyht   File: funcs.py    Apache License 2.0 5 votes vote down vote up
def gelu(x):
    return 0.5 * x * (1 + K.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * K.pow(x, 3))))


# https://stackoverflow.com/a/42194662/2796084 
Example 22
Project: group-ksparse-temporal-cnns   Author: srph25   File: ops.py    MIT License 5 votes vote down vote up
def n_p(Z, p, axis, epsilon=None):
    if epsilon is None:
        epsilon = K.epsilon()
    return K.pow(K.sum(K.pow(K.abs(Z), p), axis=axis) + epsilon, 1. / p) 
Example 23
Project: iMIMIC-RCVs   Author: medgift   File: regularizers.py    MIT License 5 votes vote down vote up
def build_loss(self):
        r"""Implements the N-dim version of function
        $$TV^{\beta}(x) = \sum_{whc} \left ( \left ( x(h, w+1, c) - x(h, w, c) \right )^{2} +
        \left ( x(h+1, w, c) - x(h, w, c) \right )^{2} \right )^{\frac{\beta}{2}}$$
        to return total variation for all images in the batch.
        """
        image_dims = K.ndim(self.img) - 2

        # Constructing slice [1:] + [:-1] * (image_dims - 1) and [:-1] * (image_dims)
        start_slice = [slice(1, None, None)] + [slice(None, -1, None) for _ in range(image_dims - 1)]
        end_slice = [slice(None, -1, None) for _ in range(image_dims)]
        samples_channels_slice = [slice(None, None, None), slice(None, None, None)]

        # Compute pixel diffs by rolling slices to the right per image dim.
        tv = None
        for i in range(image_dims):
            ss = tuple(samples_channels_slice + start_slice)
            es = tuple(samples_channels_slice + end_slice)
            diff_square = K.square(self.img[utils.slicer[ss]] - self.img[utils.slicer[es]])
            tv = diff_square if tv is None else tv + diff_square

            # Roll over to next image dim
            start_slice = np.roll(start_slice, 1).tolist()
            end_slice = np.roll(end_slice, 1).tolist()

        tv = K.sum(K.pow(tv, self.beta / 2.))
        return normalize(self.img, tv) 
Example 24
Project: iMIMIC-RCVs   Author: medgift   File: regularizers.py    MIT License 5 votes vote down vote up
def build_loss(self):
        # Infinity norm
        if np.isinf(self.p):
            value = K.max(self.img)
        else:
            value = K.pow(K.sum(K.pow(K.abs(self.img), self.p)), 1. / self.p)

        return normalize(self.img, value) 
Example 25
Project: Trident-Segmentation-CNN   Author: YalongLiu   File: metrics.py    MIT License 5 votes vote down vote up
def focal_loss(y_true, y_pred):
    # Hyper parameters
    gamma = 1  # For hard segmentation
    alpha = 0.9  # For unbalanced samples

    # Original focal loss for segmentation
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
    loss_0 = -K.pow(pt_0, gamma) * K.log(1. - pt_0 + K.epsilon())
    loss_1 = -K.pow(1. - pt_1, gamma) * K.log(K.epsilon() + pt_1)

    sum_0 = tf.reduce_sum(loss_0)
    sum_1 = tf.reduce_sum(loss_1)

    loss_0 = tf.Print(loss_0, [sum_0, sum_1], message='loss_0 loss_1:')
    loss = alpha * loss_1 + (1 - alpha) * loss_0

    # Original focal loss for classification
    # loss = -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.sum(
    #     (1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))

    # Self-defined focal loss
    # loss = -alpha * y_true * K.pow(1.0 - y_pred, gamma) * K.log(K.epsilon() + y_pred) - (1.0 - y_true) * (
    # 1 - alpha) * K.pow(y_pred, gamma) * K.log(1. - y_pred + K.epsilon())

    return K.mean(loss) 
Example 26
Project: wtte-rnn   Author: ragulpr   File: wtte.py    MIT License 5 votes vote down vote up
def loglik_discrete(y, u, a, b, epsilon=K.epsilon()):
    hazard0 = K.pow((y + epsilon) / a, b)
    hazard1 = K.pow((y + 1.0) / a, b)

    loglikelihoods = u * \
        K.log(K.exp(hazard1 - hazard0) - (1.0 - epsilon)) - hazard1
    return loglikelihoods 
Example 27
Project: wtte-rnn   Author: ragulpr   File: wtte.py    MIT License 5 votes vote down vote up
def loglik_continuous(y, u, a, b, epsilon=K.epsilon()):
    ya = (y + epsilon) / a
    loglikelihoods = u * (K.log(b) + b * K.log(ya)) - K.pow(ya, b)
    return loglikelihoods 
Example 28
Project: wtte-rnn   Author: ragulpr   File: wtte.py    MIT License 5 votes vote down vote up
def loglik_continuous_conditional_correction(y, u, a, b, epsilon=K.epsilon()):
    """Integrated conditional excess loss.
        Explanation TODO
    """
    ya = (y + epsilon) / a
    loglikelihoods = y * \
        (u * (K.log(b) + b * K.log(ya)) - (b / (b + 1.)) * K.pow(ya, b))
    return loglikelihoods 
Example 29
Project: ChimeraNet   Author: leichtrhino   File: models.py    MIT License 5 votes vote down vote up
def loss_mask(T, F, C, D):
    def _loss_mask(y_true, y_pred):
        mixture = K.expand_dims(y_true[:, :, :, C])
        mask_true = y_true[:, :, :, :C]
        return K.sum(
            K.pow((mask_true - y_pred)*mixture, 2), axis=(1, 2, 3)
        )
    return _loss_mask 
Example 30
Project: RecurrentGaze   Author: crisie   File: adamaccum.py    MIT License 5 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [(self.iterations, self.iterations + 1)]

        t = self.iterations + 1
        lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))

        ms = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
        vs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
        gs = [K.variable(np.zeros(K.get_value(p).shape)) for p in params]
        self.weights = ms + vs

        for p, g, m, v, gg in zip(params, grads, ms, vs, gs):

            flag = K.equal(self.iterations % self.accum_iters, 0)
            flag = K.cast(flag, dtype='float32')

            gg_t = (1 - flag) * (gg + g)
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * (gg + flag * g) / self.accum_iters
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square((gg + flag * g) / self.accum_iters)
            p_t = p - flag * lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append((m, flag * m_t + (1 - flag) * m))
            self.updates.append((v, flag * v_t + (1 - flag) * v))
            self.updates.append((gg, gg_t))

            new_p = p_t
            # apply constraints
            if getattr(p, 'constraint', None) is not None:
                c = p.constraints(new_p)
                new_p = c(new_p)
            self.updates.append((p, new_p))
        return self.updates 
Example 31
Project: SPECT-CT-Seg-ResUNet-Keras   Author: junyuchen245   File: custom_losses.py    MIT License 5 votes vote down vote up
def generalized_dice(y_true, y_pred, exp):
    GD =[]
    print(K.shape(y_pred))
    smooth = 1.0
    for i in range(y_pred.shape[2]):
        y_true_per_label = K.cast(K.not_equal(y_true[:, :, i], 0), K.floatx())  # Change to binary
        y_pred_per_label = y_pred[:, :, i]  # K.cast(K.not_equal(y_pred[:, :, i], 0), K.floatx())  # Change to binary
        weight = K.pow(1/K.sum(y_true_per_label, axis=1), exp)
        intersection = K.sum(y_true_per_label * y_pred_per_label, axis=1)  # (batch size, number of labels)
        union = K.sum(y_true_per_label + y_pred_per_label, axis=1)  # (batch size, number of labels)
        GD.append(weight * (2. * intersection + smooth) / (union + smooth))
    GD_out = K.stack(GD)
    return GD_out 
Example 32
Project: Beta-Mish   Author: digantamisra98   File: beta_mish.py    MIT License 5 votes vote down vote up
def call(self, inputs):
        return inputs*K.tanh(K.log(K.pow((1+K.exp(inputs)),self.beta))) 
Example 33
Project: keras-optimizers   Author: hi-im-ryanli   File: frankenstein.py    MIT License 5 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        shapes = [K.int_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + (1. - self.beta_1) * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = K.maximum(self.beta_2 * v, K.abs(g))
            v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
            m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 34
Project: bert4keras   Author: bojone   File: backend.py    Apache License 2.0 5 votes vote down vote up
def gelu_tanh(x):
    """基于Tanh近似计算的gelu函数
    """
    cdf = 0.5 * (1.0 + K.tanh(
        (np.sqrt(2 / np.pi) * (x + 0.044715 * K.pow(x, 3)))))
    return x * cdf 
Example 35
Project: qkeras   Author: google   File: example_qoctave.py    Apache License 2.0 5 votes vote down vote up
def customLoss(y_true,y_pred):
  log1 = 1.5 * y_true * K.log(y_pred + 1e-9) * K.pow(1-y_pred, 2)
  log0 = 0.5 * (1 - y_true) * K.log((1 - y_pred) + 1e-9) * K.pow(y_pred, 2)
  return (- K.sum(K.mean(log0 + log1, axis = 0))) 
Example 36
Project: qkeras   Author: google   File: qlayers.py    Apache License 2.0 5 votes vote down vote up
def __call__(self, x):
    m = pow(2, self.bits)
    m_i = pow(2, self.integer)

    if self.use_sigmoid:
      p = _sigmoid(x / m_i) * m
      xq = m_i * K.clip(
          2.0 * (_round_through(p, self.use_stochastic_rounding) / m) - 1.0,
          0.0, 1.0 - 1.0 / m)
    else:
      p = x * m / m_i
      xq = m_i * K.clip(
          _round_through(p, self.use_stochastic_rounding) / m,
          0.0, 1.0 - 1.0 / m)
    return xq 
Example 37
Project: qkeras   Author: google   File: qlayers.py    Apache License 2.0 5 votes vote down vote up
def __call__(self, x):
    non_sign_bits = self.bits - 1
    m = pow(2, non_sign_bits)
    m_i = pow(2, self.integer)
    p = _sigmoid(x / m_i) * m
    rp = 2.0 * (_round_through(p) / m) - 1.0
    u_law_p = K.sign(rp) * K.log(1 + self.u * K.abs(rp)) / K.log(1 + self.u)
    xq = m_i * K.clip(u_law_p, -1.0 + (1.0 * self.symmetric) / m, 1.0 - 1.0 / m)
    return xq 
Example 38
Project: qkeras   Author: google   File: qlayers.py    Apache License 2.0 5 votes vote down vote up
def __call__(self, x):

    min_exp = -2**(self.bits - 1)
    max_exp = 2**(self.bits - 1) - 1

    eps = K.epsilon()

    if min_exp < np.log2(eps):
      warnings.warn(
          "QKeras: min_exp in relu_po2 quantizer is smaller than K.epsilon()")

    log2 = np.log(2.0)

    if self.max_value != -1:
      max_exp = np.round(np.log2(self.max_value + eps))

    x = K.maximum(x, 0)
    if self.use_stochastic_rounding:
      x_log2 = stochastic_round_po2(x)
    else:
      x_log2 = _round_through(K.log(K.abs(x) + eps) / log2)
    x_clipped = K.clip(x_log2, min_exp, max_exp)
    return x + K.stop_gradient(-x + K.pow(2.0, x_clipped))

#
# Because it may be hard to get serialization from activation functions,
# we may be replacing their instantiation by QActivation in the future.
# 
Example 39
Project: Semantic_Segmentation_Keras   Author: liuph0119   File: loss_utils.py    Apache License 2.0 5 votes vote down vote up
def focal_loss(y_true, y_pred):
    gamma = 2
    alpha = 0.25
    pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

    pt_1 = K.clip(pt_1, 1e-3, .999)
    pt_0 = K.clip(pt_0, 1e-3, .999)

    return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.sum(
        (1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0)) 
Example 40
Project: BERT-keras   Author: Separius   File: funcs.py    GNU General Public License v3.0 5 votes vote down vote up
def gelu(x):
    return 0.5 * x * (1 + K.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * K.pow(x, 3))))


# https://stackoverflow.com/a/42194662/2796084 
Example 41
Project: AWX   Author: lucamasera   File: layers.py    GNU General Public License v3.0 5 votes vote down vote up
def n_norm(self, x, epsilon=1e-6):
        return K.pow(K.clip(K.sum(K.pow(x, self.n), -1), epsilon, 1-epsilon), 1./self.n) 
Example 42
Project: iust_deep_fuzz   Author: m-zakeri   File: learn_and_fuzz_2.py    MIT License 5 votes vote down vote up
def perplexity(y_true, y_pred):
    """
    Compute perplexity metric

    :param y_true:
    :param y_pred:
    :return:
    """
    ce = K.categorical_crossentropy(y_true, y_pred)
    # pp = K.pow(np.e, ce)  # Or 2?
    # pp = K.pow(2., ce)  # Or np.e
    pp = K.exp(ce)
    # print('Perplexity value in perplexity function: ', K.eval(pp))
    return pp 
Example 43
Project: iust_deep_fuzz   Author: m-zakeri   File: neural_fuzz_pdf_obj.py    MIT License 5 votes vote down vote up
def perplexity(y_true, y_pred):
    """
    Compute perplexity metric

    :param y_true:
    :param y_pred:
    :return:
    """
    ce = K.categorical_crossentropy(y_true, y_pred)
    # pp = K.pow(np.e, ce)  # Or 2?
    # pp = K.pow(2., ce)  # Or np.e
    pp = K.exp(ce)
    # print('Perplexity value in perplexity function: ', K.eval(pp))
    return pp 
Example 44
Project: iust_deep_fuzz   Author: m-zakeri   File: metadata_neural_fuzz_pdf_obj.py    MIT License 5 votes vote down vote up
def perplexity(y_true, y_pred):
    """
    Compute perplexity metric

    :param y_true:
    :param y_pred:
    :return:
    """
    ce = K.categorical_crossentropy(y_true, y_pred)
    # pp = K.pow(np.e, ce)  # Or 2?
    # pp = K.pow(2., ce)  # Or np.e
    pp = K.exp(ce)
    # print('Perplexity value in perplexity function: ', K.eval(pp))
    return pp 
Example 45
Project: iust_deep_fuzz   Author: m-zakeri   File: data_neural_fuzz_pdf_obj.py    MIT License 5 votes vote down vote up
def perplexity(y_true, y_pred):
    """
    Compute perplexity metric

    :param y_true:
    :param y_pred:
    :return:
    """
    ce = K.categorical_crossentropy(y_true, y_pred)
    # pp = K.pow(np.e, ce)  # Or 2?
    # pp = K.pow(2., ce)  # Or np.e
    pp = K.exp(ce)
    # print('Perplexity value in perplexity function: ', K.eval(pp))
    return pp 
Example 46
Project: iust_deep_fuzz   Author: m-zakeri   File: learn_and_fuzz_3_sample_fuzz.py    MIT License 5 votes vote down vote up
def perplexity(y_true, y_pred):
    """
    Compute perplexity metric

    :param y_true:
    :param y_pred:
    :return:
    """
    ce = K.categorical_crossentropy(y_true, y_pred)
    # pp = K.pow(np.e, ce)  # Or 2?
    # pp = K.pow(2., ce)  # Or np.e
    pp = K.exp(ce)
    # print('Perplexity value in perplexity function: ', K.eval(pp))
    return pp 
Example 47
Project: MMdnn   Author: microsoft   File: keras2_emitter.py    MIT License 5 votes vote down vote up
def _layer_LRN(self):
        self.add_body(0, '''
from keras.layers.core import Layer
class LRN(Layer):

    def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs):
        self.n = size
        self.alpha = alpha
        self.beta = beta
        self.k = k
        super(LRN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.shape = input_shape
        super(LRN, self).build(input_shape)

    def call(self, x, mask=None):
        half_n = self.n - 1
        squared = K.square(x)
        scale = self.k
        norm_alpha = self.alpha / (2 * half_n + 1)
        if K.image_dim_ordering() == "th":
            b, f, r, c = self.shape
            squared = K.expand_dims(squared, 0)
            squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0)))
            squared = K.squeeze(squared, 0)
            for i in range(half_n*2+1):
                scale += norm_alpha * squared[:, i:i+f, :, :]
        else:
            b, r, c, f = self.shape
            squared = K.expand_dims(squared, -1)
            squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n)))
            squared = K.squeeze(squared, -1)
            for i in range(half_n*2+1):
                scale += norm_alpha * squared[:, :, :, i:i+f]

        scale = K.pow(scale, self.beta)
        return x / scale

    def compute_output_shape(self, input_shape):
        return input_shape''') 
Example 48
Project: Keras-TextClassification   Author: yongzhuo   File: keras_radam.py    MIT License 5 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        beta_1_t = K.pow(self.beta_1, t)
        beta_2_t = K.pow(self.beta_2, t)
        rho = 2 / (1 - self.beta_2) - 1
        rho_t = rho - 2 * t * beta_2_t / (1 - beta_2_t)
        r_t = K.sqrt(
            K.relu(rho_t - 4) * K.relu(rho_t - 2) * rho / ((rho - 4) * (rho - 2) * rho_t)
        )
        flag = K.cast(rho_t > 4, K.floatx())

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            mhat_t = m_t / (1 - beta_1_t)
            vhat_t = K.sqrt(v_t / (1 - beta_2_t))
            p_t = p - lr * mhat_t * (flag * r_t / (vhat_t + self.epsilon) + (1 - flag))

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 49
Project: Keras-TextClassification   Author: yongzhuo   File: triangle_position_embedding.py    MIT License 5 votes vote down vote up
def call(self, inputs, mask=None):
        input_shape = K.shape(inputs)
        if self.mode == self.MODE_ADD:
            batch_size, seq_len, output_dim = input_shape[0], input_shape[1], input_shape[2]
            pos_input = K.tile(K.expand_dims(K.arange(seq_len), axis=0), [batch_size, 1])
        elif self.mode == self.MODE_CONCAT:
            batch_size, seq_len, output_dim = input_shape[0], input_shape[1], self.output_dim
            pos_input = K.tile(K.expand_dims(K.arange(seq_len), axis=0), [batch_size, 1])
        else:
            output_dim = self.output_dim
            pos_input = inputs
        if K.dtype(pos_input) != K.floatx():
            pos_input = K.cast(pos_input, K.floatx())
        evens = K.arange(output_dim // 2) * 2
        odds = K.arange(output_dim // 2) * 2 + 1
        even_embd = K.sin(
            K.dot(
                K.expand_dims(pos_input, -1),
                K.expand_dims(1.0 / K.pow(
                    10000.0,
                    K.cast(evens, K.floatx()) / K.cast(output_dim, K.floatx())
                ), 0)
            )
        )
        odd_embd = K.cos(
            K.dot(
                K.expand_dims(pos_input, -1),
                K.expand_dims(1.0 / K.pow(
                    10000.0, K.cast((odds - 1), K.floatx()) / K.cast(output_dim, K.floatx())
                ), 0)
            )
        )
        embd = K.stack([even_embd, odd_embd], axis=-1)
        output = K.reshape(embd, [-1, K.shape(inputs)[1], output_dim])
        if self.mode == self.MODE_CONCAT:
            output = K.concatenate([inputs, output], axis=-1)
        if self.mode == self.MODE_ADD:
            output += inputs
        return output 
Example 50
Project: keras-transformer   Author: kpot   File: transformer.py    MIT License 5 votes vote down vote up
def gelu(x):
    """
    GELU activation, described in paper "Gaussian Error Linear Units (GELUs)"
    https://arxiv.org/pdf/1606.08415.pdf
    """
    c = math.sqrt(2 / math.pi)
    return 0.5 * x * (1 + K.tanh(c * (x + 0.044715 * K.pow(x, 3)))) 
Example 51
Project: DiscriminativeActiveLearning   Author: dsgissin   File: query_methods.py    MIT License 5 votes vote down vote up
def get_distance_matrix(self, X, Y):

        x_input = K.placeholder((X.shape))
        y_input = K.placeholder(Y.shape)
        dot = K.dot(x_input, K.transpose(y_input))
        x_norm = K.reshape(K.sum(K.pow(x_input, 2), axis=1), (-1, 1))
        y_norm = K.reshape(K.sum(K.pow(y_input, 2), axis=1), (1, -1))
        dist_mat = x_norm + y_norm - 2.0*dot
        sqrt_dist_mat = K.sqrt(K.clip(dist_mat, min_value=0, max_value=10000))
        dist_func = K.function([x_input, y_input], [sqrt_dist_mat])

        return dist_func([X, Y])[0] 
Example 52
Project: image-segmentation   Author: nearthlab   File: image_classifier.py    MIT License 5 votes vote down vote up
def weighted_focal_loss(weights, gamma):
    weights = K.variable([weights])
    gamma = K.variable([gamma])

    def loss(gt, pr):
        # scale preds so that the class probas of each sample sum to 1
        pr /= tf.reduce_sum(pr, axis=-1, keep_dims=True)
        # manual computation of crossentropy
        pr = tf.clip_by_value(pr, K.epsilon(), 1. - K.epsilon())
        return K.mean(-tf.reduce_sum(gt * K.pow(1. - pr, gamma) * tf.log(pr) * weights, axis=-1))

    return loss 
Example 53
Project: keras-contrib   Author: keras-team   File: yogi.py    MIT License 5 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            g2 = K.square(g)
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = v - (1. - self.beta_2) * K.sign(v - g2) * g2
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 54
Project: keras-contrib   Author: keras-team   File: ftml.py    MIT License 5 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.inital_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))

        t = self.iterations + 1

        lr_t = lr / (1. - K.pow(self.beta_1, t))

        shapes = [K.int_shape(p) for p in params]
        zs = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        ds = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + zs + vs + ds

        for p, g, z, v, d in zip(params, grads, zs, vs, ds):
            v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            d_t = (K.sqrt(v_t / (1. - K.pow(self.beta_2, t)))
                   + self.epsilon) / lr_t
            sigma_t = d_t - self.beta_1 * d
            z_t = self.beta_1 * z + (1. - self.beta_1) * g - sigma_t * p

            p_t = - z_t / d_t

            self.updates.append(K.update(z, z_t))
            self.updates.append(K.update(v, v_t))
            self.updates.append(K.update(d, d_t))

            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 55
Project: medical_image_segmentation   Author: CVxTz   File: baseline_aug.py    MIT License 5 votes vote down vote up
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed 
Example 56
Project: medical_image_segmentation   Author: CVxTz   File: baseline.py    MIT License 5 votes vote down vote up
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed 
Example 57
Project: medical_image_segmentation   Author: CVxTz   File: baseline_aug_vgg.py    MIT License 5 votes vote down vote up
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed 
Example 58
Project: medical_image_segmentation   Author: CVxTz   File: focal_aug.py    MIT License 5 votes vote down vote up
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed 
Example 59
Project: keras-vis   Author: raghakot   File: regularizers.py    MIT License 5 votes vote down vote up
def build_loss(self):
        r"""Implements the N-dim version of function
        $$TV^{\beta}(x) = \sum_{whc} \left ( \left ( x(h, w+1, c) - x(h, w, c) \right )^{2} +
        \left ( x(h+1, w, c) - x(h, w, c) \right )^{2} \right )^{\frac{\beta}{2}}$$
        to return total variation for all images in the batch.
        """
        image_dims = K.ndim(self.img) - 2

        # Constructing slice [1:] + [:-1] * (image_dims - 1) and [:-1] * (image_dims)
        start_slice = [slice(1, None, None)] + [slice(None, -1, None) for _ in range(image_dims - 1)]
        end_slice = [slice(None, -1, None) for _ in range(image_dims)]
        samples_channels_slice = [slice(None, None, None), slice(None, None, None)]

        # Compute pixel diffs by rolling slices to the right per image dim.
        tv = None
        for i in range(image_dims):
            ss = tuple(samples_channels_slice + start_slice)
            es = tuple(samples_channels_slice + end_slice)
            diff_square = K.square(self.img[utils.slicer[ss]] - self.img[utils.slicer[es]])
            tv = diff_square if tv is None else tv + diff_square

            # Roll over to next image dim
            start_slice = np.roll(start_slice, 1).tolist()
            end_slice = np.roll(end_slice, 1).tolist()

        tv = K.sum(K.pow(tv, self.beta / 2.))
        return normalize(self.img, tv) 
Example 60
Project: keras-vis   Author: raghakot   File: regularizers.py    MIT License 5 votes vote down vote up
def build_loss(self):
        # Infinity norm
        if np.isinf(self.p):
            value = K.max(self.img)
        else:
            value = K.pow(K.sum(K.pow(K.abs(self.img), self.p)), 1. / self.p)

        return normalize(self.img, value) 
Example 61
Project: DLWP   Author: jweyn   File: custom.py    MIT License 5 votes vote down vote up
def on_epoch_end(self, epoch, logs=None, beta_1=0.9, beta_2=0.999,):
        optimizer = self.model.optimizer
        it = K.cast(optimizer.iterations, K.floatx())
        lr = K.cast(optimizer.lr, K.floatx())
        decay = K.cast(optimizer.decay, K.floatx())
        t = K.eval(it + 1.)
        new_lr = K.eval(lr * (1. / (1. + decay * it)))
        lr_t = K.eval(new_lr * (K.sqrt(1. - K.pow(beta_2, t)) / (1. - K.pow(beta_1, t))))
        print(' - LR: {:.6f}'.format(lr_t)) 
Example 62
Project: DLWP   Author: jweyn   File: custom.py    MIT License 5 votes vote down vote up
def latitude_weighted_loss(loss_function=mean_squared_error, lats=None, output_shape=(), axis=-2, weighting='cosine'):
    """
    Create a loss function that weights inputs by a function of latitude before calculating the loss.

    :param loss_function: method: Keras loss function to apply after the weighting
    :param lats: ndarray: 1-dimensional array of latitude coordinates
    :param output_shape: tuple: shape of expected model output
    :param axis: int: latitude axis in model output shape
    :param weighting: str: type of weighting to apply. Options are:
            cosine: weight by the cosine of the latitude (default)
            midlatitude: weight by the cosine of the latitude but also apply a 25% reduction to the equator and boost
                to the mid-latitudes
    :return: callable loss function
    """
    if weighting not in ['cosine', 'midlatitude']:
        raise ValueError("'weighting' must be one of 'cosine' or 'midlatitude'")
    if lats is not None:
        lat_tensor = K.zeros(lats.shape)
        lat_tensor.assign(K.cast_to_floatx(lats[:]))

        weights = K.cos(lat_tensor * np.pi / 180.)
        if weighting == 'midlatitude':
            weights = weights + 0.5 * K.pow(K.sin(lat_tensor * 2 * np.pi / 180.), 2.)

        weight_shape = output_shape[axis:]
        for d in weight_shape[1:]:
            weights = K.expand_dims(weights, axis=-1)
            weights = K.repeat_elements(weights, d, axis=-1)

    else:
        weights = K.ones(output_shape)

    def lat_loss(y_true, y_pred):
        return loss_function(y_true * weights, y_pred * weights)

    return lat_loss 
Example 63
Project: nnet-survival   Author: MGensheimer   File: nnet_survival.py    MIT License 5 votes vote down vote up
def call(self, x):
        #The conditional probability of surviving each time interval (given that has survived to beginning of interval)
        #is affected by the input data according to eq. 18.13 in Harrell F.,
        #Regression Modeling Strategies 2nd ed. (available free online)
        return K.pow(K.sigmoid(self.kernel), K.exp(x)) 
Example 64
Project: faceswap   Author: deepfakes   File: s3fd.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, x, *args):
        return K.pow(*x) 
Example 65
Project: bert_lamb_pretrain   Author: goldenbili   File: keras_lamb.py    Apache License 2.0 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            m_t_hat = m_t / (1. - K.pow(self.beta_1, t))
            v_t_hat = v_t / (1. - K.pow(self.beta_2, t))

            p_dash = m_t_hat / (K.sqrt(v_t_hat + self.epsilon))

            if self.weight_decay > 0.:
                wd = self.weight_decay * p
                p_dash = p_dash + wd

            r1 = K.sqrt(K.sum(K.square(p)))
            r2 = K.sqrt(K.sum(K.square(p_dash)))

            r = tf.where(tf.greater(r1, 0.),
                         tf.where(tf.greater(r2, 0.),
                                  r1 / r2,
                                  1.0),
                         1.0)
            # r = r1 / r2
            eta = r * lr

            p_t = p - eta * p_dash

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 66
Project: keras_radam   Author: forin-xyz   File: radam.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay:
            lr = lr * (1. / (1. + self.decay * K.cast(
                self.iterations, K.dtype(self.decay)
            )))

        t = K.cast(self.iterations, K.floatx()) + 1.
        beta_1 = self.beta_1
        beta_2 = self.beta_2
        beta_1_t = K.pow(beta_1, t)
        beta_2_t = K.pow(beta_2, t)
        rho_inf = 2. / (1. - beta_2) - 1.
        rho_t = rho_inf - 2. * t * beta_2_t / (1. - beta_2_t)
        r_t = K.sqrt(
            K.relu(rho_t - 4.) * (rho_t - 2.) * rho_inf / (
                (rho_inf - 4.) * (rho_inf - 2.) * rho_t )
        )
        flag = K.cast(rho_t > 4., K.floatx())

        ms = [K.zeros(K.int_shape(p)) for p in params]
        vs = [K.zeros(K.int_shape(p)) for p in params]

        self.weights = [self.iterations] + ms + vs
        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = beta_1 * m + (1. - beta_1) * g
            v_t = beta_2 * v + (1. - beta_2) * K.square(g)

            m_hat_t = m_t / (1. - beta_1_t)
            v_hat_t = K.sqrt(v_t / (1. - beta_2_t))
            new_p = p - lr * (r_t / (v_hat_t + self.epsilon) + flag - 1.)* m_hat_t

            if getattr(p, "constraint", None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
        return self.updates 
Example 67
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers_225.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.updates.append(K.update_add(self.t_cur, 1))

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        total_iterations = self.total_iterations
        # Cosine annealing
        if self.use_cosine_annealing and total_iterations != 0:
            self.eta_t = _compute_eta_t(self)
        self.lr_t = lr_t * self.eta_t  # for external tracking
        lr_t_premult = lr_t

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # Learning rate multipliers
            if self.lr_multipliers is not None:
                lr_t = _apply_lr_multiplier(self, lr_t_premult, p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            # Weight decays
            if p.name in self.weight_decays.keys() and total_iterations != 0:
                p_t = _apply_weight_decays(self, p, p_t)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        self._init_notified = True
        return self.updates 
Example 68
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers_225.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.updates.append(K.update_add(self.t_cur, 1))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (1. - 0.5 * (
            K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (
            K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        shapes = [K.int_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        self.weights = [self.iterations] + ms + vs

        total_iterations = self.total_iterations
        # Cosine annealing
        if self.use_cosine_annealing and total_iterations != 0:
            self.eta_t = _compute_eta_t(self)
        self.lr_t = self.lr * self.eta_t  # for external tracking

        for p, g, m, v in zip(params, grads, ms, vs):
            # Learning rate multipliers
            lr_t = self.lr
            if self.lr_multipliers is not None:
                lr_t = _apply_lr_multiplier(self, lr_t, p)

            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + (1. - self.beta_1) * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
            m_t_bar = (1. - momentum_cache_t) * g_prime + (
                momentum_cache_t_1 * m_t_prime)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            p_t = p - self.eta_t * lr_t * m_t_bar / (
                    K.sqrt(v_t_prime) + self.epsilon)

            # Weight decays
            if p.name in self.weight_decays.keys() and total_iterations != 0:
                p_t = _apply_weight_decays(self, p, p_t)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        self._init_notified = True
        return self.updates 
Example 69
Project: keras_extension   Author: k1414st   File: optimizers.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            eta_l_t = self.terminal_bound - \
                (self.terminal_bound - self.lower_bound) / \
                ((1. - self.beta_2) * t + 1)
            eta_u_t = self.terminal_bound + \
                (self.upper_bound - self.terminal_bound) / \
                ((1. - self.beta_2) * t)

            clipped_lr_t = K.minimum(
                K.maximum(lr_t / (K.sqrt(v_t) + self.epsilon), eta_l_t), eta_u_t)
            p_t = p - clipped_lr_t * m_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 70
Project: keras_extension   Author: k1414st   File: optimizers.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            eta_l_t = self.terminal_bound - \
                (self.terminal_bound - self.lower_bound) / \
                ((1. - self.beta_2) * t + 1)
            eta_u_t = self.terminal_bound + \
                (self.upper_bound - self.terminal_bound) / \
                ((1. - self.beta_2) * t)

            clipped_lr_t = K.minimum(
                K.maximum(lr_t / (K.sqrt(v_t) + self.epsilon), eta_l_t), eta_u_t)
            p_t = p - clipped_lr_t * m_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 71
Project: Coloring-greyscale-images   Author: emilwallner   File: AdamAccumulate.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr

        completed_updates = K.cast(K.tf.floor(self.iterations / self.accum_iters), K.floatx())

        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * completed_updates))

        t = completed_updates + 1

        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t)))

        # self.iterations incremented after processing a batch
        # batch:              1 2 3 4 5 6 7 8 9
        # self.iterations:    0 1 2 3 4 5 6 7 8
        # update_switch = 1:        x       x    (if accum_iters=4)  
        update_switch = K.equal((self.iterations + 1) % self.accum_iters, 0)
        update_switch = K.cast(update_switch, K.floatx())

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]

        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat, tg in zip(params, grads, ms, vs, vhats, gs):

            sum_grad = tg + g
            avg_grad = sum_grad / self.accum_iters_float

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * avg_grad
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(avg_grad)

            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, (1 - update_switch) * vhat + update_switch * vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, (1 - update_switch) * m + update_switch * m_t))
            self.updates.append(K.update(v, (1 - update_switch) * v + update_switch * v_t))
            self.updates.append(K.update(tg, (1 - update_switch) * sum_grad))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, (1 - update_switch) * p + update_switch * new_p))
        return self.updates 
Example 72
Project: keras-optimizers   Author: hi-im-ryanli   File: frankenstein.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        shapes = [K.int_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + (1. - self.beta_1) * g
            m_t_prime = m_t / (1. - m_schedule_next)
            if np.random.choice([1, -1]) == 1:
                v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            else:
                v_t = K.maximum(self.beta_2 * v, K.abs(g))
            v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
            m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 73
Project: keras-optimizers   Author: hi-im-ryanli   File: frankenstein.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        shapes = [K.int_shape(p) for p in params]
        accumulators = [K.zeros(shape) for shape in shapes]
        delta_accumulators = [K.zeros(shape) for shape in shapes]

        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v, a, d_a in zip(params, grads, ms, vs, accumulators, delta_accumulators):
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))

            # use the new accumulator and the *old* delta_accumulator
            update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * update
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(update)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 74
Project: FormicID   Author: naturalis   File: optimizer.py    MIT License 4 votes vote down vote up
def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)

        self.updates = [K.update_add(self.iterations, 1)]
        t = self.iterations + 1

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        loss_prev = K.variable(0)
        self.updates.append(K.update(loss_prev, loss))

        # Calculate the numerator of the Eve coefficient
        d_num_t = K.abs(loss_prev - loss)
        self.updates.append(K.update(self.d_num, d_num_t))

        # Calculate the denominator of the Eve coefficient
        d_den_t = K.abs(K.minimum(loss_prev, loss) - self.loss_min)
        self.updates.append(K.update(self.d_den, d_den_t))

        # Calculate the Eve coefficient. At the first iteration, it is 1.
        d_tilde_t = K.clip(
            (d_num_t + self.fmin_pos) / (d_den_t + self.fmin_pos),
            1. / self.c,
            self.c,
        )
        d_t = (self.beta_3 * self.d) + (1. - self.beta_3) * d_tilde_t
        d_t = K.switch(K.greater(t, 1), d_t, K.constant(1))
        self.updates.append(K.update(self.d, d_t))

        # Calculate the effective learning rate as lr / (d * decay)
        lr_eff_t = self.lr / (d_t * (1. + (self.iterations * self.decay)))
        self.updates.append(K.update(self.lr_eff, lr_eff_t))

        # Apply bias correction to the learning rate
        lr_hat_t = (
            lr_eff_t
            * K.sqrt(1. - K.pow(self.beta_2, t))
            / (1. - K.pow(self.beta_1, t))
        )

        # Update per parameter
        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            self.updates.append(K.update(m, m_t))

            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            self.updates.append(K.update(v, v_t))

            p_t = p - lr_hat_t * m_t / (K.sqrt(v_t) + self.epsilon)
            new_p = p_t
            # Apply constraints
            if p in constraints:
                c = constraints[p]
                new_p = c(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 75
Project: keras-adabound   Author: titu1994   File: adabound.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            bounded_lr_t = m_t * K.minimum(K.maximum(step_size_p_bound,
                                                     lower_bound), upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 76
Project: keras-contrib   Author: keras-team   File: padam.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            # Partial momentum adaption.
            new_p = p - (lr_t * (m_t / (denom ** (self.partial * 2))))

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 77
Project: keras-padam   Author: titu1994   File: padam.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            # Partial momentum adaption
            new_p = p - (lr_t * (m_t / (denom ** (self.partial * 2))))

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        return self.updates 
Example 78
Project: faceswap   Author: deepfakes   File: optimizers.py    GNU General Public License v3.0 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        # Pass off to CPU if requested
        if self.cpu_mode:
            with K.tf.device("/cpu:0"):
                ms, vs, vhats = self.update_1(params)
        else:
            ms, vs, vhats = self.update_1(params)

        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 79
Project: StyleGAN-Keras   Author: manicman1999   File: adamlr.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):

            # Learning rate multipliers
            if self.multipliers:
                multiplier = [mult for mult in self.multipliers if mult in p.name]
            else:
                multiplier = None
            if multiplier:
                new_lr_t = lr_t * self.multipliers[multiplier[0]]
                if self.debug_verbose:
                    print('Setting {} to learning rate {}'.format(multiplier[0], new_lr_t))
                    print(K.get_value(new_lr_t))
            else:
                new_lr_t = lr_t
                if self.debug_verbose:
                    print('No change in learning rate {}'.format(p.name))
                    print(K.get_value(new_lr_t))
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - new_lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - new_lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 80
Project: BERT_with_keras   Author: miroozyx   File: optimization.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = tf.train.polynomial_decay(
            self.lr,
            self.iterations,
            self.num_train_steps,
            end_learning_rate=0.0,
            power=1.0,
            cycle=False
        )

        # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
        # learning rate will be `global_step/num_warmup_steps * init_lr`.
        t = K.cast(self.iterations, K.floatx()) + 1
        warmup_percent_done = K.cast(t / self.num_warmup_steps, dtype=K.floatx())
        warmup_lr = self.lr * warmup_percent_done
        is_warmup = K.cast(t < self.num_warmup_steps, dtype=K.floatx())
        lr = ((1.0 - is_warmup) * lr) + is_warmup * warmup_lr

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]

        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.bias_corrected:
                m_t /= 1 - K.pow(self.beta_1, t)
                v_t /= 1 - K.pow(self.beta_2, t)

            update = m_t / (K.sqrt(v_t) + self.epsilon)

            # Just adding the square of the weights to the loss function is *not*
            # the correct way of using L2 regularization/weight decay with Adam,
            # since that will interact with the m and v parameters in strange ways.
            #
            # Instead we want ot decay the weights in a manner that doesn't interact
            # with the m/v parameters. This is equivalent to adding the square
            # of the weights to the loss with plain (non-momentum) SGD.
            param_name = self._get_variable_name(p.name)
            if self._do_use_weight_decay(param_name):
                update += self.weight_decay_rate * p

            p_t = p - lr * update

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates