Python keras.backend.pow() Examples

The following are 30 code examples of keras.backend.pow(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function

Example #1

Source File: custom.py From WannaPark with GNU General Public License v3.0

6 votes

def call(self, x, mask=None):
        if K.image_dim_ordering == "th":
            _, f, r, c = self.shape
        else:
            _, r, c, f = self.shape
        squared = K.square(x)
        pooled = K.pool2d(squared, (self.n, self.n), strides=(1, 1),
            padding="same", pool_mode="avg")
        if K.image_dim_ordering == "th":
            summed = K.sum(pooled, axis=1, keepdims=True)
            averaged = self.alpha * K.repeat_elements(summed, f, axis=1)
        else:
            summed = K.sum(pooled, axis=3, keepdims=True)
            averaged = self.alpha * K.repeat_elements(summed, f, axis=3)
        denom = K.pow(self.k + averaged, self.beta)
        return x / denom

Example #2

Source File: position_embedding.py From nlp_toolkit with MIT License

6 votes

def call(self, x):
        if (self.size is None) or (self.mode == 'sum'):
            self.size = int(x.shape[-1])
        batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
        position_j = 1. / K.pow(10000.,
                                2 * K.arange(self.size / 2, dtype='float32'
                                             ) / self.size)
        position_j = K.expand_dims(position_j, 0)
        # K.arange不支持变长，只好用这种方法生成
        position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1
        position_i = K.expand_dims(position_i, 2)
        position_ij = K.dot(position_i, position_j)
        position_ij = K.concatenate(
            [K.cos(position_ij), K.sin(position_ij)], 2)
        if self.mode == 'sum':
            return position_ij + x
        elif self.mode == 'concat':
            return K.concatenate([position_ij, x], 2)

Example #3

Source File: costs.py From Deep-Spectral-Clustering-using-Dual-Autoencoder-Network with MIT License

6 votes

def full_affinity(X, scale):
    '''
    Calculates the symmetrized full Gaussian affinity matrix, scaled
    by a provided scale

    X:              input dataset of size n
    scale:          provided scale

    returns:        n x n affinity matrix
    '''
    sigma = K.variable(scale)
    Dx = squared_distance(X)
    sigma_squared = K.pow(sigma, 2)
    sigma_squared = K.expand_dims(sigma_squared, -1)
    Dx_scaled = Dx / (2 * sigma_squared)
    W = K.exp(-Dx_scaled)
    return W

Example #4

Source File: optimizers.py From keras-lookahead with MIT License

6 votes

def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = self.learning_rate * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            self.updates.append(K.update_sub(p, p_t))
        return self.updates

Example #5

Source File: memory.py From NTM-Keras with MIT License

6 votes

def sharpen(_weight_t, scalar_gama_t):
    '''
    The convolution operation in convolutional shift can cause leakage or
    dispersion of weights over time if the shift weighting is no sharp.
    For example, if shift of -1, 0 and 1 are given weights of 0.1, 0.8,
    and 0.1, the rotation will transform a weighting focused at single
    point into one slightly blurred over three points. To combat this,
    each head emits one further scalar \gama >= 1 whose effect is sharpen
    the final weighting as follows:
    $$w_{i}^{(t)} = \frac{(\hat{w}_{i}^{(t)})^{\gama}}
    {\sum_{j}\hat{w}_{j}^{(t)})^{\gama}}$$
    :param _weight_t: the weight vector which denotes a memory address.
    :param scalar_gama_t: the scalar for sharpen.
    :return: the sharpened weight.
    '''
    weight_t = K.pow(_weight_t, scalar_gama_t)
    return weight_t / K.sum(weight_t)

Example #6

Source File: costs.py From SpectralNet with MIT License

6 votes

def full_affinity(X, scale):
    '''
    Calculates the symmetrized full Gaussian affinity matrix, scaled
    by a provided scale

    X:              input dataset of size n
    scale:          provided scale

    returns:        n x n affinity matrix
    '''
    sigma = K.variable(scale)
    Dx = squared_distance(X)
    sigma_squared = K.pow(sigma, 2)
    sigma_squared = K.expand_dims(sigma_squared, -1)
    Dx_scaled = Dx / (2 * sigma_squared)
    W = K.exp(-Dx_scaled)
    return W

Example #7

Source File: training.py From aiexamples with Apache License 2.0

6 votes

def focal_loss(y_true, y_pred, gamma=2, alpha=0.25):
    """Compute focal loss.
    
    # Arguments
        y_true: Ground truth targets,
            tensor of shape (?, num_boxes, num_classes).
        y_pred: Predicted logits,
            tensor of shape (?, num_boxes, num_classes).
    
    # Returns
        focal_loss: Focal loss, tensor of shape (?, num_boxes).

    # References
        https://arxiv.org/abs/1708.02002
    """
    #y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
    eps = K.epsilon()
    y_pred = K.clip(y_pred, eps, 1. - eps)
    
    pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
    focal_loss = -tf.reduce_sum(alpha * K.pow(1. - pt, gamma) * K.log(pt), axis=-1)
    return focal_loss

Example #8

Source File: losses.py From cs-ranking with Apache License 2.0

6 votes

def make_smooth_ndcg_loss(y_true, y_pred):
    y_true, y_pred = tensorify(y_true), tensorify(y_pred)
    n_objects = K.max(y_true) + 1.0
    y_true_f = K.cast(y_true, "float32")
    relevance = n_objects - y_true_f - 1.0
    log_term = K.log(relevance + 2.0) / K.log(2.0)
    exp_relevance = K.pow(2.0, relevance) - 1.0
    gains = exp_relevance / log_term

    # Calculate ideal dcg:
    idcg = K.sum(gains, axis=-1)

    # Calculate smoothed dcg:
    exped = K.exp(y_pred)
    exped = exped / K.sum(exped, axis=-1, keepdims=True)
    # toppred, toppred_ind = tf.nn.top_k(gains * exped, k)
    return 1 - K.sum(exped * gains, axis=-1) / idcg

Example #9

Source File: custom_cost.py From LIVE_SER with Apache License 2.0

5 votes

def focal_loss_fixed(self, y_true, y_pred):
        if(K.backend()=="tensorflow"):
            import tensorflow as tf
            pt = y_pred
            return -K.sum(self.alpha * y_true * K.pow(1. - pt, self.gamma) * K.log(pt)) / self.nb_class
        if(K.backend()=="theano"):
            import theano.tensor as T
            pt = y_pred
            return -K.sum(self.alpha * y_true * K.pow(1. - pt, self.gamma) * K.log(pt)) / self.nb_class
    #def get_shape(self):
    #    return 0

Example #10

Source File: adamw.py From EAST with GNU General Public License v3.0

5 votes

def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        wd = self.wd # decoupled weight decay (3/4)

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - lr * wd * p # decoupled weight decay (4/4)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates

Example #11

Source File: model.py From noise2noise with MIT License

5 votes

def __call__(self):
        def calc_loss(y_true, y_pred):
            loss = K.pow(K.abs(y_true - y_pred) + 1e-8, self.gamma)
            return loss
        return calc_loss

Example #12

Source File: regularizers.py From keras-vis with MIT License

5 votes

def build_loss(self):
        # Infinity norm
        if np.isinf(self.p):
            value = K.max(self.img)
        else:
            value = K.pow(K.sum(K.pow(K.abs(self.img), self.p)), 1. / self.p)

        return normalize(self.img, value)

Example #13

Source File: utils.py From Look-Into-Person with MIT License

5 votes

def focal_loss(gamma=2, alpha=0.75):
    def focal_loss_fixed(y_true, y_pred):  # with tensorflow
        eps = 1e-12
        # improve the stability of the focal loss and see issues 1 for more information
        y_pred = K.clip(y_pred, eps, 1. - eps)
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.sum(
            (1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))

    return focal_loss_fixed


# getting the number of GPUs

Example #14

Source File: net.py From 3D-CNNs-for-Liver-Classification with Apache License 2.0

5 votes

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

Example #15

Source File: s3fd.py From faceswap with GNU General Public License v3.0

5 votes

def call(self, x, *args):
        return K.pow(*x)

Example #16

Source File: decaying_dropout.py From DIIN-in-Keras with MIT License

5 votes

def call(self, inputs, training=None):
        noise_shape = self._get_noise_shape(inputs)
        t = K.cast(self.iterations, K.floatx()) + 1
        p = t / float(self.decay_interval)

        keep_rate = self.initial_keep_rate * K.pow(self.decay_rate, p)

        def dropped_inputs():
            self.add_update([K.update_add(self.iterations, [1])], inputs)
            return K.dropout(inputs, 1 - keep_rate[0], noise_shape, seed=self.seed)
        return K.in_train_phase(dropped_inputs, inputs, training=training)

Example #17

Source File: attention.py From Self-Attention-Keras with Apache License 2.0

5 votes

def call(self, x):
        if (self.size == None) or (self.mode == 'sum'):
            self.size = int(x.shape[-1])
        batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
        position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size)
        position_j = K.expand_dims(position_j, 0)
        position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1  # K.arange不支持变长，只好用这种方法生成
        position_i = K.expand_dims(position_i, 2)
        position_ij = K.dot(position_i, position_j)
        position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2)
        if self.mode == 'sum':
            return position_ij + x
        elif self.mode == 'concat':
            return K.concatenate([position_ij, x], 2)

Example #18

Source File: neural_style_transfer.py From Style_Migration_For_Artistic_Font_With_CNN with MIT License

5 votes

def style_loss(style, combination):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    S_C = S-C
    channels = 3
    size = height * width
    return K.sum(K.square(S_C)) / (4. * (channels ** 2) * (size ** 2))
    #return K.sum(K.pow(S_C,4)) / (4. * (channels ** 2) * (size ** 2))  # 居然和平方没有什么不同
    #return K.sum(K.pow(S_C,4)+K.pow(S_C,2)) / (4. * (channels ** 2) * (size ** 2))  # 也能用，花后面出现了叶子

Example #19

Source File: ftml.py From keras-contrib with MIT License

5 votes

def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.inital_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))

        t = self.iterations + 1

        lr_t = lr / (1. - K.pow(self.beta_1, t))

        shapes = [K.int_shape(p) for p in params]
        zs = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        ds = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + zs + vs + ds

        for p, g, z, v, d in zip(params, grads, zs, vs, ds):
            v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            d_t = (K.sqrt(v_t / (1. - K.pow(self.beta_2, t)))
                   + self.epsilon) / lr_t
            sigma_t = d_t - self.beta_1 * d
            z_t = self.beta_1 * z + (1. - self.beta_1) * g - sigma_t * p

            p_t = - z_t / d_t

            self.updates.append(K.update(z, z_t))
            self.updates.append(K.update(v, v_t))
            self.updates.append(K.update(d, d_t))

            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates

Example #20

Source File: custom.py From DLWP with MIT License

5 votes

def latitude_weighted_loss(loss_function=mean_squared_error, lats=None, output_shape=(), axis=-2, weighting='cosine'):
    """
    Create a loss function that weights inputs by a function of latitude before calculating the loss.

    :param loss_function: method: Keras loss function to apply after the weighting
    :param lats: ndarray: 1-dimensional array of latitude coordinates
    :param output_shape: tuple: shape of expected model output
    :param axis: int: latitude axis in model output shape
    :param weighting: str: type of weighting to apply. Options are:
            cosine: weight by the cosine of the latitude (default)
            midlatitude: weight by the cosine of the latitude but also apply a 25% reduction to the equator and boost
                to the mid-latitudes
    :return: callable loss function
    """
    if weighting not in ['cosine', 'midlatitude']:
        raise ValueError("'weighting' must be one of 'cosine' or 'midlatitude'")
    if lats is not None:
        lat_tensor = K.zeros(lats.shape)
        lat_tensor.assign(K.cast_to_floatx(lats[:]))

        weights = K.cos(lat_tensor * np.pi / 180.)
        if weighting == 'midlatitude':
            weights = weights + 0.5 * K.pow(K.sin(lat_tensor * 2 * np.pi / 180.), 2.)

        weight_shape = output_shape[axis:]
        for d in weight_shape[1:]:
            weights = K.expand_dims(weights, axis=-1)
            weights = K.repeat_elements(weights, d, axis=-1)

    else:
        weights = K.ones(output_shape)

    def lat_loss(y_true, y_pred):
        return loss_function(y_true * weights, y_pred * weights)

    return lat_loss

Example #21

Source File: custom.py From DLWP with MIT License

5 votes

def on_epoch_end(self, epoch, logs=None, beta_1=0.9, beta_2=0.999,):
        optimizer = self.model.optimizer
        it = K.cast(optimizer.iterations, K.floatx())
        lr = K.cast(optimizer.lr, K.floatx())
        decay = K.cast(optimizer.decay, K.floatx())
        t = K.eval(it + 1.)
        new_lr = K.eval(lr * (1. / (1. + decay * it)))
        lr_t = K.eval(new_lr * (K.sqrt(1. - K.pow(beta_2, t)) / (1. - K.pow(beta_1, t))))
        print(' - LR: {:.6f}'.format(lr_t))

Example #22

Source File: custom_cost.py From LIVE_SER with Apache License 2.0

5 votes

def categorical_focal_loss(y_true, y_pred):
    if(K.backend()=="tensorflow"):
        import tensorflow as tf
        pt = y_pred
        return -K.sum(alpha * y_true * K.pow(1. - pt, gamma) * K.log(pt)) / nb_class
    if(K.backend()=="theano"):
        import theano.tensor as T
        pt = y_pred
        return -K.sum(alpha * y_true * K.pow(1. - pt, gamma) * K.log(pt)) / nb_class

Example #23

Source File: regularizers.py From keras-vis with MIT License

5 votes

def build_loss(self):
        r"""Implements the N-dim version of function
        $$TV^{\beta}(x) = \sum_{whc} \left ( \left ( x(h, w+1, c) - x(h, w, c) \right )^{2} +
        \left ( x(h+1, w, c) - x(h, w, c) \right )^{2} \right )^{\frac{\beta}{2}}$$
        to return total variation for all images in the batch.
        """
        image_dims = K.ndim(self.img) - 2

        # Constructing slice [1:] + [:-1] * (image_dims - 1) and [:-1] * (image_dims)
        start_slice = [slice(1, None, None)] + [slice(None, -1, None) for _ in range(image_dims - 1)]
        end_slice = [slice(None, -1, None) for _ in range(image_dims)]
        samples_channels_slice = [slice(None, None, None), slice(None, None, None)]

        # Compute pixel diffs by rolling slices to the right per image dim.
        tv = None
        for i in range(image_dims):
            ss = tuple(samples_channels_slice + start_slice)
            es = tuple(samples_channels_slice + end_slice)
            diff_square = K.square(self.img[utils.slicer[ss]] - self.img[utils.slicer[es]])
            tv = diff_square if tv is None else tv + diff_square

            # Roll over to next image dim
            start_slice = np.roll(start_slice, 1).tolist()
            end_slice = np.roll(end_slice, 1).tolist()

        tv = K.sum(K.pow(tv, self.beta / 2.))
        return normalize(self.img, tv)

Example #24

Source File: focal_aug.py From medical_image_segmentation with MIT License

5 votes

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

Example #25

Source File: baseline_aug_vgg.py From medical_image_segmentation with MIT License

5 votes

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

Example #26

Source File: baseline.py From medical_image_segmentation with MIT License

5 votes

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

Example #27

Source File: baseline_aug.py From medical_image_segmentation with MIT License

5 votes

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

Example #28

Source File: training.py From aiexamples with Apache License 2.0

5 votes

def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [(self.iterations, self.iterations + 1)]

        t = self.iterations + 1
        lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = ms + vs
        
        flag = K.equal(t % self.accum_iters, 0)
        flag = K.cast(flag, dtype='float32')
        
        for p, g, m, v, gg in zip(params, grads, ms, vs, gs):

            gg_t = (1 - flag) * (gg + g)
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * (gg + flag * g) / self.accum_iters
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square((gg + flag * g) / self.accum_iters)
            p_t = p - flag * lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append((m, flag * m_t + (1 - flag) * m))
            self.updates.append((v, flag * v_t + (1 - flag) * v))
            self.updates.append((gg, gg_t))
            
            # apply constraints.
            new_p = p_t
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates

Example #29

Source File: focal_loss.py From focal-loss-keras with MIT License

5 votes

def focal_loss(gamma=2., alpha=.25):
	def focal_loss_fixed(y_true, y_pred):
		pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
		pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
		return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
	return focal_loss_fixed

Example #30

Source File: yogi.py From keras-contrib with MIT License

5 votes

def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            g2 = K.square(g)
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = v - (1. - self.beta_2) * K.sign(v - g2) * g2
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates