Python keras.backend.epsilon() Examples

The following are code examples for showing how to use keras.backend.epsilon(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: 360_aware_saliency   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 7 votes vote down vote up
def nss(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred
    y_pred_flatten = K.batch_flatten(y_pred)

    y_mean = K.mean(y_pred_flatten, axis=-1)
    y_mean = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_mean)), 
                                                               shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_std = K.std(y_pred_flatten, axis=-1)
    y_std = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.expand_dims(y_std)), 
                                                              shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_pred = (y_pred - y_mean) / (y_std + K.epsilon())

    return -(K.sum(K.sum(y_true * y_pred, axis=2), axis=2) / K.sum(K.sum(y_true, axis=2), axis=2))


# Gaussian priors initialization 
Example 2
Project: cdc   Author: ckbjimmy   File: Attention.py    MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example 3
Project: cbc_networks   Author: saralajew   File: reasoning_layers.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def call(self, inputs, **kwargs):
        # decode the reasoning probabilities
        positive_kernel = self.reasoning_probabilities[0]
        negative_kernel = (1 - positive_kernel) * \
                          self.reasoning_probabilities[1]

        if self.use_component_probabilities:
            # squash component probabilities
            components_probabilities = softmax(self.component_probabilities)

            positive_kernel = positive_kernel * components_probabilities
            negative_kernel = negative_kernel * components_probabilities

        # stabilize the division with a small epsilon
        probs = (K.dot(inputs, (positive_kernel - negative_kernel)) \
                 + K.sum(negative_kernel, 1)) \
                / (K.sum(positive_kernel + negative_kernel, 1) + K.epsilon())

        # squeeze replica dimension if one.
        if self.n_replicas == 1:
            probs = K.squeeze(probs, axis=1)
        else:
            probs = K.permute_dimensions(probs, (0, 2, 1))

        return probs 
Example 4
Project: 360_aware_saliency   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 6 votes vote down vote up
def kl_divergence(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred

    sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_true /= (sum_y_true + K.epsilon())
    y_pred /= (sum_y_pred + K.epsilon())

    return 10 * K.sum(K.sum(y_true * K.log((y_true / (y_pred + K.epsilon())) + K.epsilon()), axis=-1), axis=-1)


# Correlation Coefficient Loss 
Example 5
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers.py    MIT License 6 votes vote down vote up
def get_config(self):
        config = {
            'learning_rate': float(K_eval(self.learning_rate)),
            'beta_1': float(K_eval(self.beta_1)),
            'beta_2': float(K_eval(self.beta_2)),
            'decay': float(K_eval(self.decay)),
            'batch_size': int(K_eval(self.batch_size)),
            'total_iterations': int(self.total_iterations),
            'weight_decays': self.weight_decays,
            'lr_multipliers': self.lr_multipliers,
            'use_cosine_annealing': self.use_cosine_annealing,
            't_cur': int(K_eval(self.t_cur)),
            'eta_t': int(K_eval(self.eta_t)),
            'eta_min': int(K_eval(self.eta_min)),
            'eta_max': int(K_eval(self.eta_max)),
            'init_verbose': self.init_verbose,
            'epsilon': self.epsilon,
            'amsgrad': self.amsgrad
        }
        base_config = super(AdamW, self).get_config()
        return dict(list(base_config.items()) + list(config.items())) 
Example 6
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers.py    MIT License 6 votes vote down vote up
def get_config(self):
        config = {
            'learning_rate': float(K_eval(self.learning_rate)),
            'beta_1': float(K_eval(self.beta_1)),
            'beta_2': float(K_eval(self.beta_2)),
            'epsilon': self.epsilon,
            'schedule_decay': self.schedule_decay,
            'batch_size': int(K_eval(self.batch_size)),
            'total_iterations': int(self.total_iterations),
            'weight_decays': self.weight_decays,
            'lr_multipliers': self.lr_multipliers,
            'use_cosine_annealing': self.use_cosine_annealing,
            't_cur': int(K_eval(self.t_cur)),
            'eta_t': int(K_eval(self.eta_t)),
            'eta_min': int(K_eval(self.eta_min)),
            'eta_max': int(K_eval(self.eta_max)),
            'init_verbose': self.init_verbose
        }
        base_config = super(NadamW, self).get_config()
        return dict(list(base_config.items()) + list(config.items())) 
Example 7
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers_225.py    MIT License 6 votes vote down vote up
def get_config(self):
        config = {
            'lr': float(K.get_value(self.lr)),
            'beta_1': float(K.get_value(self.beta_1)),
            'beta_2': float(K.get_value(self.beta_2)),
            'decay': float(K.get_value(self.decay)),
            'batch_size': int(K.get_value(self.batch_size)),
            'total_iterations': int(self.total_iterations),
            'weight_decays': self.weight_decays,
            'lr_multipliers': self.lr_multipliers,
            'use_cosine_annealing': self.use_cosine_annealing,
            't_cur': int(K.get_value(self.t_cur)),
            'eta_t': int(K.eval(self.eta_t)),
            'eta_min': int(K.get_value(self.eta_min)),
            'eta_max': int(K.get_value(self.eta_max)),
            'init_verbose': self.init_verbose,
            'epsilon': self.epsilon,
            'amsgrad': self.amsgrad
        }
        base_config = super(AdamW, self).get_config()
        return dict(list(base_config.items()) + list(config.items())) 
Example 8
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers_225.py    MIT License 6 votes vote down vote up
def get_config(self):
        config = {
            'lr': float(K.get_value(self.lr)),
            'beta_1': float(K.get_value(self.beta_1)),
            'beta_2': float(K.get_value(self.beta_2)),
            'epsilon': self.epsilon,
            'schedule_decay': self.schedule_decay,
            'batch_size': int(K.get_value(self.batch_size)),
            'total_iterations': int(self.total_iterations),
            'weight_decays': self.weight_decays,
            'lr_multipliers': self.lr_multipliers,
            'use_cosine_annealing': self.use_cosine_annealing,
            't_cur': int(K.get_value(self.t_cur)),
            'eta_t': int(K.eval(self.eta_t)),
            'eta_min': int(K.get_value(self.eta_min)),
            'eta_max': int(K.get_value(self.eta_max)),
            'init_verbose': self.init_verbose
        }
        base_config = super(NadamW, self).get_config()
        return dict(list(base_config.items()) + list(config.items())) 
Example 9
Project: spektral   Author: danielegrattarola   File: ops.py    MIT License 6 votes vote down vote up
def normalize_A(A):
    """
    Computes symmetric normalization of A, dealing with sparse A and batch mode
    automatically.
    :param A: Tensor or SparseTensor with rank k = {2, 3}.
    :return: SparseTensor of rank k.
    """
    D = degrees(A)
    D = tf.sqrt(D)[:, None] + K.epsilon()
    if K.ndim(A) == 3:
        # Batch mode
        output = (A / D) / transpose(D, perm=(0, 2, 1))
    else:
        # Single mode
        output = (A / D) / transpose(D)

    return output 
Example 10
Project: keras_extension   Author: k1414st   File: optimizers.py    MIT License 6 votes vote down vote up
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
                 epsilon=None, decay=0.,
                 terminal_bound=0.1, lower_bound=0., upper_bound=None, **kwargs):
        super(AdaBound, self).__init__(**kwargs)
        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.lr = K.variable(lr, name='lr')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')
            if upper_bound is None:
                upper_bound = terminal_bound * 2.
            self.terminal_bound = K.variable(terminal_bound, name='terminal_bound')
            self.lower_bound = K.variable(lower_bound, name='lower_bound')
            self.upper_bound = K.variable(upper_bound, name='upper_bound')
        if epsilon is None:
            epsilon = K.epsilon()
        self.epsilon = epsilon
        self.initial_decay = decay 
Example 11
Project: keras_extension   Author: k1414st   File: optimizers.py    MIT License 6 votes vote down vote up
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
                 epsilon=None, decay=0.,
                 terminal_bound=0.1, lower_bound=0., upper_bound=None, **kwargs):
        super(AdaBound, self).__init__(**kwargs)
        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.lr = K.variable(lr, name='lr')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')
            if upper_bound is None:
                upper_bound = terminal_bound * 2.
            self.terminal_bound = K.variable(terminal_bound, name='terminal_bound')
            self.lower_bound = K.variable(lower_bound, name='lower_bound')
            self.upper_bound = K.variable(upper_bound, name='upper_bound')
        if epsilon is None:
            epsilon = K.epsilon()
        self.epsilon = epsilon
        self.initial_decay = decay 
Example 12
Project: deepGroupv2   Author: albu5   File: networks.py    MIT License 6 votes vote down vote up
def min_max_group(y_true, y_pred):
    diag = tf.eye(num_rows=tf.shape(y_true)[2], batch_shape=kb.expand_dims(tf.shape(y_true)[0], axis=0))

    in_frame_row = kb.max(y_true, axis=1, keepdims=True)

    in_frame_col = kb.max(y_true, axis=2, keepdims=True)

    mask = kb.batch_dot(in_frame_col, in_frame_row, axes=(2, 1))

    intra_max = kb.max(y_pred + y_true + mask - diag - 2, axis=2)

    intra_min = kb.min(y_pred - y_true - mask + diag + 2, axis=2)

    inter_max = kb.max(y_pred - y_true + mask - 1, axis=2)

    return (kb.sum(inter_max - intra_max, axis=-1) + kb.epsilon()) / (kb.sum(in_frame_row, axis=-1) + kb.epsilon()) 
Example 13
Project: DeepResearch   Author: Hsankesara   File: attention_with_context.py    MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example 14
Project: MCF-3D-CNN   Author: xyj77   File: conv_featuremaps_visualization.py    MIT License 6 votes vote down vote up
def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + K.epsilon())
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    if K.image_data_format() == 'channels_first':
        x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x 
Example 15
Project: iMIMIC-RCVs   Author: medgift   File: optimizer.py    MIT License 6 votes vote down vote up
def _rmsprop(self, grads, cache=None, decay_rate=0.95):
        """Uses RMSProp to compute step from gradients.

        Args:
            grads: numpy array of gradients.
            cache: numpy array of same shape as `grads` as RMSProp cache
            decay_rate: How fast to decay cache

        Returns:
            A tuple of
                step: numpy array of the same shape as `grads` giving the step.
                    Note that this does not yet take the learning rate into account.
                cache: Updated RMSProp cache.
        """
        if cache is None:
            cache = np.zeros_like(grads)
        cache = decay_rate * cache + (1 - decay_rate) * grads ** 2
        step = -grads / np.sqrt(cache + K.epsilon())
        return step, cache 
Example 16
Project: iMIMIC-RCVs   Author: medgift   File: utils.py    MIT License 6 votes vote down vote up
def random_array(shape, mean=128., std=20.):
    """Creates a uniformly distributed random array with the given `mean` and `std`.

    Args:
        shape: The desired shape
        mean: The desired mean (Default value = 128)
        std: The desired std (Default value = 20)

    Returns: Random numpy array of given `shape` uniformly distributed with desired `mean` and `std`.
    """
    x = np.random.random(shape)
    # normalize around mean=0, std=1
    x = (x - np.mean(x)) / (np.std(x) + K.epsilon())
    # and then around the desired mean/std
    x = (x * std) + mean
    return x 
Example 17
Project: iMIMIC-RCVs   Author: medgift   File: utils.py    MIT License 6 votes vote down vote up
def deprocess_input(input_array, input_range=(0, 255)):
    """Utility function to scale the `input_array` to `input_range` throwing away high frequency artifacts.

    Args:
        input_array: An N-dim numpy array.
        input_range: Specifies the input range as a `(min, max)` tuple to rescale the `input_array`.

    Returns:
        The rescaled `input_array`.
    """
    # normalize tensor: center on 0., ensure std is 0.1
    input_array = input_array.copy()
    input_array -= input_array.mean()
    input_array /= (input_array.std() + K.epsilon())
    input_array *= 0.1

    # clip to [0, 1]
    input_array += 0.5
    input_array = np.clip(input_array, 0, 1)

    # Convert to `input_range`
    return (input_range[1] - input_range[0]) * input_array + input_range[0] 
Example 18
Project: VisualNN   Author: angelhunt   File: cifar10_cnn_capsule.py    GNU General Public License v3.0 5 votes vote down vote up
def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


# define our own softmax function instead of K.softmax
# because K.softmax can not specify axis. 
Example 19
Project: Kaggle-Statoil-Challenge   Author: adodd202   File: capsulelayers.py    MIT License 5 votes vote down vote up
def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon())
    return scale * vectors 
Example 20
Project: CapsAttnNet   Author: rstager   File: canlayer.py    MIT License 5 votes vote down vote up
def squash_scale(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon())
    return scale 
Example 21
Project: phoneticSimilarity   Author: ronggong   File: attentionWithContext.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result 
Example 22
Project: phoneticSimilarity   Author: ronggong   File: attention.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def call(self, x, mask=None):
        eij = dot_product(x, self.W)  # (samples, steps)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a_expand = K.expand_dims(a)

        # element wise
        weighted_input = x * a_expand

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result 
Example 23
Project: cdc   Author: ckbjimmy   File: EmbCRNN.py    MIT License 5 votes vote down vote up
def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision 
Example 24
Project: cdc   Author: ckbjimmy   File: EmbCRNN.py    MIT License 5 votes vote down vote up
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall 
Example 25
Project: cdc   Author: ckbjimmy   File: EmbCRNN.py    MIT License 5 votes vote down vote up
def fbeta_score(y_true, y_pred, beta=1):
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score 
Example 26
Project: cdc   Author: ckbjimmy   File: Attention.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        uit = K.dot(x, self.W)
        
        if self.bias:
            uit += self.b
        
        uit = K.tanh(uit)
        
        mul_a = uit  * self.u # with this
        ait = K.sum(mul_a, axis=2) # and this
        
        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1) 
Example 27
Project: cdc   Author: ckbjimmy   File: EmbGRUattention.py    MIT License 5 votes vote down vote up
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall 
Example 28
Project: cdc   Author: ckbjimmy   File: EmbGRUattention.py    MIT License 5 votes vote down vote up
def fbeta_score(y_true, y_pred, beta=1):
    if beta < 0:
        raise ValueError('The lowest choosable beta is zero (only precision).')
    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    return fbeta_score 
Example 29
Project: AI_Competition   Author: Decalogue   File: attention.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        input_shape = K.int_shape(x)

        features_dim = self.features_dim
        # step_dim = self.step_dim
        step_dim = input_shape[1]

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b[:input_shape[1]]

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
    	# print weigthted_input.shape
        return K.sum(weighted_input, axis=1) 
Example 30
Project: AI_Competition   Author: Decalogue   File: attention.py    MIT License 5 votes vote down vote up
def call(self, inputs, mask=None):
        en = inputs[0]
        de = inputs[1]
        de_shape = K.int_shape(de)
        step_dim = de_shape[1]

        hid_en = K.dot(en, self.W_en1)
        hid_de = K.dot(de, self.W_en2)
        if self.bias:
            hid_en += self.b_en1
            hid_de += self.b_en2
        hid = K.tanh(K.expand_dims(hid_en, axis=1) + hid_de)
        eij = K.reshape(K.dot(hid, K.reshape(self.W_de, (self.hid_size, 1))), (-1, step_dim))
        if self.bias:
            eij += self.b_de[:step_dim]

        a = K.exp(eij - K.max(eij, axis=-1, keepdims=True))

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask[1], K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = de * a
        return K.sum(weighted_input, axis=1) 
Example 31
Project: kaggle-carvana-2017   Author: killthekitten   File: losses.py    MIT License 5 votes vote down vote up
def bootstrapped_crossentropy(y_true, y_pred, bootstrap_type='hard', alpha=0.95):
    target_tensor = y_true
    prediction_tensor = y_pred
    _epsilon = _to_tensor(K.epsilon(), prediction_tensor.dtype.base_dtype)
    prediction_tensor = K.tf.clip_by_value(prediction_tensor, _epsilon, 1 - _epsilon)
    prediction_tensor = K.tf.log(prediction_tensor / (1 - prediction_tensor))

    if bootstrap_type == 'soft':
        bootstrap_target_tensor = alpha * target_tensor + (1.0 - alpha) * K.tf.sigmoid(prediction_tensor)
    else:
        bootstrap_target_tensor = alpha * target_tensor + (1.0 - alpha) * K.tf.cast(
            K.tf.sigmoid(prediction_tensor) > 0.5, K.tf.float32)
    return K.mean(K.tf.nn.sigmoid_cross_entropy_with_logits(
        labels=bootstrap_target_tensor, logits=prediction_tensor)) 
Example 32
Project: CapsNet   Author: l11x0m7   File: capsule.py    MIT License 5 votes vote down vote up
def squash(s, axis=-1):
    """
    Squash function. This could be viewed as one kind of activations.
    """
    squared_s = K.sum(K.square(s), axis=axis, keepdims=True)
    scale = squared_s / (1 + squared_s) / K.sqrt(squared_s + K.epsilon())
    return scale * s 
Example 33
Project: CapsNet   Author: l11x0m7   File: capsule.py    MIT License 5 votes vote down vote up
def call(self, inputs, **kwargs):
        # inputs -> (X, y), then output the mask of y
        # inputs -> X, then output the mask of prediction
        if type(inputs) is list or tuple:
            inputs, mask = inputs
        else:
            pred = K.sqrt(K.sum(K.square(inputs), axis=-1) + K.epsilon())
            mask = K.one_hot(indices=K.argmax(pred, 1), num_classes=pred.get_shape().as_list()[1])
        return K.batch_flatten(inputs * K.expand_dims(mask, axis=-1)) 
Example 34
Project: 3DGCN   Author: blackmints   File: loss.py    MIT License 5 votes vote down vote up
def std_r2(std=1):
    def r2(y_true, y_pred):
        ss_res = K.sum(K.square((y_true - y_pred) * std))
        ss_tot = K.sum(K.square((y_true - K.mean(y_true) * std)))
        return 1 - ss_res / (ss_tot + K.epsilon())

    return r2 
Example 35
Project: CalibrationNN   Author: Andres-Hernandez   File: neural_network.py    GNU General Public License v3.0 5 votes vote down vote up
def logarithmic_mean_squared_error(y_true, y_pred):
    return -K.mean(K.log(1.-K.clip(K.square(y_pred-y_true),0., 1.-K.epsilon())))

#_paper 
Example 36
Project: memorable-quotes   Author: TheAnig   File: main.py    MIT License 5 votes vote down vote up
def fmeasure(y_true, y_pred):
    
    def precision(y_true, y_pred):
        true_positives = np.sum(np.round(np.clip(y_true * y_pred, 0, 1)))
        predicted_positives = np.sum(np.round(np.clip(y_pred, 0, 1)))
        precision = true_positives/(predicted_positives+ K.epsilon())
        return precision
    
    def recall(y_true, y_pred):
        true_positives = np.sum(np.round(np.clip(y_true*y_pred, 0, 1)))
        possible_positives = np.sum(np.round(np.clip(y_true, 0, 1)))
    
        recall = true_positives/ (possible_positives + K.epsilon())
    
        return recall
    
    def fbeta_score(y_true, y_pred, beta = 1):
        if beta < 0:
            raise ValueError('The lowest beta value is 0')
        if np.sum(np.round(np.clip(y_true, 0, 1))) == 0:
            return 0
    
        p = precision(y_true, y_pred)
        r = recall(y_true, y_pred)
    
        bb = beta ** 2
    
        fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
    
        return fbeta_score
    
    return fbeta_score(y_true, y_pred, beta = 1) 
Example 37
Project: BlurbGenreCollection-HMC   Author: uhh-lt   File: capsulelayers.py    Apache License 2.0 5 votes vote down vote up
def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon())
    return scale * vectors 
Example 38
Project: keras-lamb   Author: CyberZHG   File: optimizer.py    MIT License 5 votes vote down vote up
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
                 epsilon=1e-6, decay=0., weight_decay=0.01,
                 lower_bound=1e-3, upper_bound=10.0, **kwargs):
        super(Lamb, self).__init__(**kwargs)
        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.lr = K.variable(lr, name='lr')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')
            self.weight_decay = K.variable(weight_decay, name='weight_decay')
        self.epsilon = epsilon
        self.initial_decay = decay
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound 
Example 39
Project: keras-lamb   Author: CyberZHG   File: optimizer.py    MIT License 5 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        t = K.cast(self.iterations, K.floatx()) + 1
        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            mhat_t = m_t / (1. - K.pow(self.beta_1, t))
            vhat_t = v_t / (1. - K.pow(self.beta_2, t))

            u_t = mhat_t / K.sqrt(vhat_t + self.epsilon) + self.weight_decay * p
            trust_ratio = K.sqrt(K.sum(K.square(p)) / K.sum(K.square(u_t)))
            trust_ratio = K.minimum(K.maximum(trust_ratio, self.lower_bound), self.upper_bound)

            lr_p = trust_ratio * lr
            new_p = p - lr_p * u_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 40
Project: keras-lamb   Author: CyberZHG   File: optimizer.py    MIT License 5 votes vote down vote up
def get_config(self):
        config = {'lr': float(K.get_value(self.lr)),
                  'beta_1': float(K.get_value(self.beta_1)),
                  'beta_2': float(K.get_value(self.beta_2)),
                  'decay': float(K.get_value(self.decay)),
                  'weight_decay': float(K.get_value(self.weight_decay)),
                  'epsilon': self.epsilon,
                  'upper_bound': self.upper_bound,
                  'lower_bound': self.lower_bound}
        base_config = super(Lamb, self).get_config()
        return dict(list(base_config.items()) + list(config.items())) 
Example 41
Project: ODENet   Author: uqyge   File: utils.py    MIT License 5 votes vote down vote up
def coeff_r2(y_true, y_pred):
    from keras import backend as K
    SS_res = K.sum(K.square(y_true-y_pred))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return (1 - SS_res/(SS_tot + K.epsilon())) 
Example 42
Project: cbc_networks   Author: saralajew   File: constraints.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __call__(self, w):
        w /= K.sqrt(K.maximum(K.sum(K.square(w),
                                    self.axis,
                                    keepdims=True),
                              K.epsilon()))
        return w 
Example 43
Project: cbc_networks   Author: saralajew   File: detection_probability_functions.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def call(self, inputs, **kwargs):
        def sqrt(x):
            return K.sqrt(K.maximum(x, K.epsilon()))

        # Both components and input given
        if isinstance(inputs, list) and len(inputs) > 1:
            signals, kernel = inputs
        else:
            signals = inputs
            kernel = self.components.astype(K.floatx())

        # move component_number to channel dimension
        kernel = K.permute_dimensions(kernel, (1, 2, 3, 0))
        # normalize kernel
        normed_kernel = kernel / sqrt(K.sum(K.square(kernel),
                                            (0, 1, 2),
                                            keepdims=True))

        # get norm of signals
        signals_norm = sqrt(K.conv2d(K.square(signals),
                                     np.ones(K.int_shape(kernel)[:3] + (1,),
                                             dtype=K.floatx()),
                                     strides=self.strides,
                                     padding=self.padding,
                                     data_format='channels_last',
                                     dilation_rate=self.dilation_rate))

        diss = K.conv2d(signals,
                        normed_kernel,
                        strides=self.strides,
                        padding=self.padding,
                        data_format='channels_last',
                        dilation_rate=self.dilation_rate) / signals_norm

        if self.n_replicas != 1:
            shape = K.int_shape(diss)
            diss = K.reshape(diss, (-1, shape[1], shape[2],
                                    shape[3] // self.n_replicas,
                                    self.n_replicas))

        return self.activation(diss) 
Example 44
Project: 360_aware_saliency   Author: MikhailStartsev   File: gaussian_prior.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, x, mask=None):
        mu_x = self.W[:self.nb_gaussian]
        mu_y = self.W[self.nb_gaussian:self.nb_gaussian*2]
        sigma_x = self.W[self.nb_gaussian*2:self.nb_gaussian*3]
        sigma_y = self.W[self.nb_gaussian*3:]

        self.b_s = x.shape[0]
        self.height = x.shape[2]
        self.width = x.shape[3]

        e = self.height / self.width
        e1 = (1 - e) / 2
        e2 = e1 + e

        mu_x = K.clip(mu_x, 0.25, 0.75)
        mu_y = K.clip(mu_y, 0.35, 0.65)

        sigma_x = K.clip(sigma_x, 0.1, 0.9)
        sigma_y = K.clip(sigma_y, 0.2, 0.8)

        x_t = T.dot(T.ones((self.height, 1)), self._linspace(0, 1.0, self.width).dimshuffle('x', 0))
        y_t = T.dot(self._linspace(e1, e2, self.height).dimshuffle(0, 'x'), T.ones((1, self.width)))

        x_t = K.repeat_elements(K.expand_dims(x_t, dim=-1), self.nb_gaussian, axis=-1)
        y_t = K.repeat_elements(K.expand_dims(y_t, dim=-1), self.nb_gaussian, axis=-1)

        gaussian = 1 / (2 * np.pi * sigma_x * sigma_y + K.epsilon()) * \
                   T.exp(-((x_t - mu_x) ** 2 / (2 * sigma_x ** 2 + K.epsilon()) +
                           (y_t - mu_y) ** 2 / (2 * sigma_y ** 2 + K.epsilon())))

        gaussian = K.permute_dimensions(gaussian, (2, 0, 1))
        max_gauss = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(gaussian, axis=1), axis=1)), self.height, axis=-1)), self.width, axis=-1)
        gaussian = gaussian / max_gauss

        output = K.repeat_elements(K.expand_dims(gaussian, dim=0), self.b_s, axis=0)

        return output 
Example 45
Project: 360_aware_saliency   Author: MikhailStartsev   File: models.py    GNU General Public License v3.0 5 votes vote down vote up
def correlation_coefficient(y_true, y_pred):
    max_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.max(K.max(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    y_pred /= max_y_pred

    sum_y_true = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_true, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)
    sum_y_pred = K.repeat_elements(K.expand_dims(K.repeat_elements(K.expand_dims(K.sum(K.sum(y_pred, axis=2), axis=2)), 
                                                                   shape_r_out, axis=-1)), shape_c_out, axis=-1)

    y_true /= (sum_y_true + K.epsilon())
    y_pred /= (sum_y_pred + K.epsilon())

    N = shape_r_out * shape_c_out
    sum_prod = K.sum(K.sum(y_true * y_pred, axis=2), axis=2)
    sum_x = K.sum(K.sum(y_true, axis=2), axis=2)
    sum_y = K.sum(K.sum(y_pred, axis=2), axis=2)
    sum_x_square = K.sum(K.sum(K.square(y_true), axis=2), axis=2)
    sum_y_square = K.sum(K.sum(K.square(y_pred), axis=2), axis=2)

    num = sum_prod - ((sum_x * sum_y) / N)
    den = K.sqrt((sum_x_square - K.square(sum_x) / N) * (sum_y_square - K.square(sum_y) / N))

    return -2 * num / den


# Normalized Scanpath Saliency Loss 
Example 46
Project: ICASSP2019_TCN   Author: DSIP-UPatras   File: custom_layers.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result 
Example 47
Project: lmtc-eurlex57k   Author: iliaschalkidis   File: attention.py    Apache License 2.0 5 votes vote down vote up
def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result 
Example 48
Project: lmtc-eurlex57k   Author: iliaschalkidis   File: attention.py    Apache License 2.0 5 votes vote down vote up
def call(self, x, mask=None):
        uit = dot_product(x, self.W)

        if self.bias:
            uit += self.b

        uit = K.tanh(uit)

        # Dot product with context vector U
        ait = dot_product(uit, self.u)

        a = K.exp(ait)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result 
Example 49
Project: kutils   Author: subpic   File: tensor_ops.py    MIT License 5 votes vote down vote up
def plcc_tf(x, y):
    """PLCC metric"""
    xc = x - K.mean(x)
    yc = y - K.mean(y)
    return K.mean(xc*yc) / (K.std(x)*K.std(y) + K.epsilon()) 
Example 50
Project: ycml   Author: skylander86   File: neural_networks.py    Apache License 2.0 5 votes vote down vote up
def keras_f1_score(y_true, y_pred):
    '''Calculates the F score, the weighted harmonic mean of precision and recall.
    This is useful for multi-label classification, where input samples can be
    classified as sets of labels. By only using accuracy (precision) a model
    would achieve a perfect score by simply assigning every class to every
    input. In order to avoid this, a metric should penalize incorrect class
    assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0)
    computes this, as a weighted mean of the proportion of correct class
    assignments vs. the proportion of incorrect class assignments.
    With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning
    correct classes becomes more important, and with beta > 1 the metric is
    instead weighted towards penalizing incorrect class assignments.
    '''
    # If there are no true positives, fix the F score at 0 like sklearn.

    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
        return 0

    beta = 1.0

    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    p = true_positives / (predicted_positives + K.epsilon())

    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    r = true_positives / (possible_positives + K.epsilon())

    bb = beta ** 2
    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())

    return fbeta_score
#end def 
Example 51
Project: CapsNet-Fashion-MNIST   Author: subarnop   File: capsulelayers.py    GNU General Public License v3.0 5 votes vote down vote up
def call(self, inputs, **kwargs):
        # use true label to select target capsule, shape=[batch_size, num_capsule]
        if type(inputs) is list:  # true label is provided with shape = [batch_size, n_classes], i.e. one-hot code.
            assert len(inputs) == 2
            inputs, mask = inputs
        else:  # if no true label, mask by the max length of vectors of capsules
            x = inputs
            # Enlarge the range of values in x to make max(new_x)=1 and others < 0
            x = (x - K.max(x, 1, True)) / K.epsilon() + 1
            mask = K.clip(x, 0, 1)  # the max value in x clipped to 1 and other to 0

        # masked inputs, shape = [batch_size, dim_vector]
        inputs_masked = K.batch_dot(inputs, mask, [1, 1])
        return inputs_masked 
Example 52
Project: CapsNet-Fashion-MNIST   Author: subarnop   File: capsulelayers.py    GNU General Public License v3.0 5 votes vote down vote up
def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon())
    return scale * vectors 
Example 53
Project: musical_genres_classification   Author: shaoeric   File: Attention.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        features_dim = self.features_dim
        # 这里应该是 step_dim是我们指定的参数,它等于input_shape[1],也就是rnn的timesteps
        step_dim = self.step_dim

        # 输入和参数分别reshape再点乘后,tensor.shape变成了(batch_size*timesteps, 1),之后每个batch要分开进行归一化
		# 所以应该有 eij = K.reshape(..., (-1, timesteps))
        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b
        # RNN一般默认激活函数为tanh, 对attention来说激活函数差别不打,因为要做softmax
        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
        # 如果前面的层有mask,那么后面这些被mask掉的timestep肯定是不能参与计算输出的,也就是将他们的attention权重设为0
            a *= K.cast(mask, K.floatx())
        # cast是做类型转换,keras计算时会检查类型,可能是因为用gpu的原因
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        # a = K.expand_dims(a, axis=-1) , axis默认为-1, 表示在最后扩充一个维度。
		# 比如shape = (3,)变成 (3, 1)
        a = K.expand_dims(a)
        # 此时a.shape = (batch_size, timesteps, 1), x.shape = (batch_size, timesteps, units)
        weighted_input = x * a

        # weighted_input的shape为 (batch_size, timesteps, units), 每个timestep的输出向量已经乘上了该timestep的权重
		# weighted_input在axis=1上取和,返回值的shape为 (batch_size, 1, units)
        return K.sum(weighted_input, axis=1) 
Example 54
Project: musical_genres_classification   Author: shaoeric   File: Attention.py    MIT License 5 votes vote down vote up
def call(self, x, mask=None):
        features_dim = self.features_dim
        # 这里应该是 step_dim是我们指定的参数,它等于input_shape[1],也就是rnn的timesteps
        step_dim = self.step_dim

        # 输入和参数分别reshape再点乘后,tensor.shape变成了(batch_size*timesteps, 1),之后每个batch要分开进行归一化
		# 所以应该有 eij = K.reshape(..., (-1, timesteps))
        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b
        # RNN一般默认激活函数为tanh, 对attention来说激活函数差别不打,因为要做softmax
        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
        # 如果前面的层有mask,那么后面这些被mask掉的timestep肯定是不能参与计算输出的,也就是将他们的attention权重设为0
            a *= K.cast(mask, K.floatx())
        # cast是做类型转换,keras计算时会检查类型,可能是因为用gpu的原因
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        # a = K.expand_dims(a, axis=-1) , axis默认为-1, 表示在最后扩充一个维度。
		# 比如shape = (3,)变成 (3, 1)
        a = K.expand_dims(a)
        # 此时a.shape = (batch_size, timesteps, 1), x.shape = (batch_size, timesteps, units)
        weighted_input = x * a

        # weighted_input的shape为 (batch_size, timesteps, units), 每个timestep的输出向量已经乘上了该timestep的权重
		# weighted_input在axis=1上取和,返回值的shape为 (batch_size, 1, units)
        return K.sum(weighted_input, axis=1) 
Example 55
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers.py    MIT License 5 votes vote down vote up
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999,
                 amsgrad=False, batch_size=32, total_iterations=0,
                 total_iterations_wd=None, use_cosine_annealing=False,
                 weight_decays=None, lr_multipliers=None, init_verbose=True,
                 eta_min=0, eta_max=1, t_cur=0, **kwargs):
        self.initial_decay = kwargs.pop('decay', 0.0)
        self.epsilon = kwargs.pop('epsilon', K.epsilon())
        learning_rate = kwargs.pop('lr', learning_rate)
        eta_t = kwargs.pop('eta_t', 1.)
        super(AdamW, self).__init__(**kwargs)

        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.learning_rate = K.variable(learning_rate, name='learning_rate')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(self.initial_decay, name='decay')
            self.batch_size = K.variable(batch_size, dtype='int64',
                                         name='batch_size')
            self.eta_min = K.constant(eta_min, name='eta_min')
            self.eta_max = K.constant(eta_max, name='eta_max')
            self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t')
            self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur')

        self.total_iterations = total_iterations
        self.total_iterations_wd = total_iterations_wd or total_iterations
        self.amsgrad = amsgrad
        self.lr_multipliers = lr_multipliers
        self.weight_decays = weight_decays or {}
        self.init_verbose = init_verbose
        self.use_cosine_annealing = use_cosine_annealing

        self._init_notified = False
        _check_args(total_iterations, use_cosine_annealing, self.weight_decays) 
Example 56
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers.py    MIT License 5 votes vote down vote up
def __init__(self, learning_rate=0.002, beta_1=0.9, beta_2=0.999,
                 batch_size=32, total_iterations=0,
                 total_iterations_wd=None, use_cosine_annealing=False,
                 weight_decays=None, lr_multipliers=None, init_verbose=True,
                 eta_min=0, eta_max=1, t_cur=0, **kwargs):
        self.schedule_decay = kwargs.pop('schedule_decay', 0.004)
        self.epsilon = kwargs.pop('epsilon', K.epsilon())
        learning_rate = kwargs.pop('lr', learning_rate)
        eta_t = kwargs.pop('eta_t', 1.)
        super(NadamW, self).__init__(**kwargs)

        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.m_schedule = K.variable(1., name='m_schedule')
            self.learning_rate = K.variable(learning_rate, name='learning_rate')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.batch_size = K.variable(batch_size, dtype='int64',
                                         name='batch_size')
            self.eta_min = K.constant(eta_min, name='eta_min')
            self.eta_max = K.constant(eta_max, name='eta_max')
            self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t')
            self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur')

        self.total_iterations = total_iterations
        self.total_iterations_wd = total_iterations_wd or total_iterations
        self.lr_multipliers = lr_multipliers
        self.weight_decays = weight_decays or {}
        self.use_cosine_annealing = use_cosine_annealing
        self.init_verbose = init_verbose

        self._init_notified = False
        _check_args(total_iterations, use_cosine_annealing, self.weight_decays) 
Example 57
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers_225.py    MIT License 5 votes vote down vote up
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
                 amsgrad=False, epsilon=None, decay=0.0,
                 batch_size=32, total_iterations=0,
                 total_iterations_wd=None, use_cosine_annealing=False,
                 weight_decays=None, lr_multipliers=None, init_verbose=True,
                 eta_min=0, eta_max=1, t_cur=0, **kwargs):
        eta_t = kwargs.pop('eta_t', 1.)
        super(AdamW, self).__init__(**kwargs)

        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.lr = K.variable(lr, name='lr')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')
            self.batch_size = K.variable(batch_size, dtype='int64',
                                         name='batch_size')
            self.eta_min = K.constant(eta_min, name='eta_min')
            self.eta_max = K.constant(eta_max, name='eta_max')
            self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t')
            self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur')

        self.initial_decay = decay
        self.epsilon = epsilon or K.epsilon()
        self.total_iterations = total_iterations
        self.total_iterations_wd = total_iterations_wd or total_iterations
        self.amsgrad = amsgrad
        self.lr_multipliers = lr_multipliers
        self.weight_decays = weight_decays or {}
        self.init_verbose = init_verbose
        self.use_cosine_annealing = use_cosine_annealing

        self._init_notified = False
        _check_args(total_iterations, use_cosine_annealing, self.weight_decays) 
Example 58
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers_225.py    MIT License 5 votes vote down vote up
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999,
                 schedule_decay=0.004, epsilon=None,
                 batch_size=32, total_iterations=0,
                 total_iterations_wd=None, use_cosine_annealing=False,
                 weight_decays=None, lr_multipliers=None, init_verbose=True,
                 eta_min=0, eta_max=1, t_cur=0, **kwargs):
        eta_t = kwargs.pop('eta_t', 1.)
        super(NadamW, self).__init__(**kwargs)

        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.m_schedule = K.variable(1., name='m_schedule')
            self.lr = K.variable(lr, name='lr')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.batch_size = K.variable(batch_size, dtype='int64',
                                         name='batch_size')
            self.eta_min = K.constant(eta_min, name='eta_min')
            self.eta_max = K.constant(eta_max, name='eta_max')
            self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t')
            self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur')

        self.epsilon = epsilon or K.epsilon()
        self.schedule_decay = schedule_decay
        self.total_iterations = total_iterations
        self.total_iterations_wd = total_iterations_wd or total_iterations
        self.lr_multipliers = lr_multipliers
        self.weight_decays = weight_decays or {}
        self.use_cosine_annealing = use_cosine_annealing
        self.init_verbose = init_verbose

        self._init_notified = False
        _check_args(total_iterations, use_cosine_annealing, self.weight_decays) 
Example 59
Project: DeepLearn   Author: GauravBh1010tt   File: model_abcnn.py    MIT License 5 votes vote down vote up
def compute_euclidean_match_score(l_r):
    l, r = l_r
    denominator = 1. + K.sqrt(
        -2 * K.batch_dot(l, r, axes=[2, 2]) +
        K.expand_dims(K.sum(K.square(l), axis=2), 2) +
        K.expand_dims(K.sum(K.square(r), axis=2), 1)
    )
    denominator = K.maximum(denominator, K.epsilon())
    return 1. / denominator 
Example 60
Project: gccaps   Author: tqbl   File: capsnet.py    MIT License 5 votes vote down vote up
def _merge(inputs):
    """Merge the given pair of inputs across the temporal dimension.

    Args:
        inputs (list): Pair of inputs to merge. Each input should be a
            T x L Keras tensor (excluding batch dimension), where T is
            the temporal dimension and L is the number of classes.

    Returns:
        A Keras tensor (vector) of length L.
    """
    caps, att = inputs
    att = K.clip(att, K.epsilon(), 1.)
    return K.sum(caps * att, axis=1) / K.sum(att, axis=1) 
Example 61
Project: gccaps   Author: tqbl   File: capsules.py    MIT License 5 votes vote down vote up
def squash(x, axis=-1):
    """Apply a squashing nonlinearity as described in [1]_.

    Args:
        x (tensor): Input tensor to transform.
        axis (int): Axis along which squashing is applied.

    Returns:
        A Keras tensor of the resulting output.
    """
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) \
        / K.sqrt(s_squared_norm + K.epsilon())
    return scale * x 
Example 62
Project: backdoor   Author: bolunwang   File: visualizer.py    MIT License 5 votes vote down vote up
def reset_state(self, pattern_init, mask_init):

        print('resetting state')

        # setting cost
        if self.reset_cost_to_zero:
            self.cost = 0
        else:
            self.cost = self.init_cost
        K.set_value(self.cost_tensor, self.cost)

        # setting mask and pattern
        mask = np.array(mask_init)
        pattern = np.array(pattern_init)
        mask = np.clip(mask, self.mask_min, self.mask_max)
        pattern = np.clip(pattern, self.color_min, self.color_max)
        mask = np.expand_dims(mask, axis=2)

        # convert to tanh space
        mask_tanh = np.arctanh((mask - 0.5) * (2 - self.epsilon))
        pattern_tanh = np.arctanh((pattern / 255.0 - 0.5) * (2 - self.epsilon))
        print('mask_tanh', np.min(mask_tanh), np.max(mask_tanh))
        print('pattern_tanh', np.min(pattern_tanh), np.max(pattern_tanh))

        K.set_value(self.mask_tanh_tensor, mask_tanh)
        K.set_value(self.pattern_tanh_tensor, pattern_tanh)

        # resetting optimizer states
        self.reset_opt()

        pass 
Example 63
Project: deepkeyphraseextraction   Author: basaldella   File: AnswerRNN2.py    Apache License 2.0 5 votes vote down vote up
def cos_distance(y_true, y_pred):
    import keras.backend as K
    def l2_normalize(x, axis):
        norm = K.sqrt(K.sum(K.square(x), axis=axis, keepdims=True))
        return K.sign(x) * K.maximum(K.abs(x), K.epsilon()) / K.maximum(norm, K.epsilon())

    y_true = K.l2_normalize(y_true, axis=-1)
    y_pred = K.l2_normalize(y_pred, axis=-1)
    return K.mean(1 - K.sum((y_true * y_pred), axis=-1))

# End loss 
Example 64
Project: keras_extension   Author: k1414st   File: optimizers.py    MIT License 5 votes vote down vote up
def get_config(self):
        config = {'lr': float(K.get_value(self.lr)),
                  'beta_1': float(K.get_value(self.beta_1)),
                  'beta_2': float(K.get_value(self.beta_2)),
                  'decay': float(K.get_value(self.decay)),
                  'terminal_bound': float(K.get_value(self.terminal_bound)),
                  'upper_bound': float(K.get_value(self.upper_bound)),
                  'lower_bound': float(K.get_value(self.lower_bound)),
                  'epsilon': self.epsilon}
        base_config = super(AdaBound, self).get_config()
        return dict(list(base_config.items()) + list(config.items())) 
Example 65
Project: cdt-ccm-aae   Author: danielegrattarola   File: layers.py    MIT License 5 votes vote down vote up
def __call__(self, w):
            return K.maximum(w, K.epsilon()) 
Example 66
Project: cdt-ccm-aae   Author: danielegrattarola   File: layers.py    MIT License 5 votes vote down vote up
def __call__(self, w):
            return K.minimum(w, -K.epsilon()) 
Example 67
Project: deepGroupv2   Author: albu5   File: networks.py    MIT License 5 votes vote down vote up
def my_categorical_crossentropy(target, output, from_logits=False):
    """Categorical crossentropy between an output tensor and a target tensor.

    # Arguments
        target: A tensor of the same shape as `output`.
        output: A tensor resulting from a softmax
            (unless `from_logits` is True, in which
            case `output` is expected to be the logits).
        from_logits: Boolean, whether `output` is the
            result of a softmax, or is a tensor of logits.

    # Returns
        Output tensor.
    """
    # Note: tf.nn.softmax_cross_entropy_with_logits
    # expects logits, Keras expects probabilities.
    if not from_logits:
        # scale preds so that the class probas of each sample sum to 1
        output /= tf.reduce_sum(output,
                                len(output.get_shape()) - 1,
                                True)
        # manual computation of crossentropy
        _epsilon = tf.convert_to_tensor(kb.epsilon(), output.dtype.base_dtype)
        output = tf.clip_by_value(output, _epsilon, 1. - _epsilon)
        return - tf.reduce_sum(target * tf.log(output), axis=None)
    else:
        return tf.nn.softmax_cross_entropy_with_logits(labels=target,
                                                       logits=output) 
Example 68
Project: deepGroupv2   Author: albu5   File: networks.py    MIT License 5 votes vote down vote up
def my_categorical_crossentropy_mask(target, output, from_logits=False):
    """Categorical crossentropy between an output tensor and a target tensor.

    # Arguments
        target: A tensor of the same shape as `output`.
        output: A tensor resulting from a softmax
            (unless `from_logits` is True, in which
            case `output` is expected to be the logits).
        from_logits: Boolean, whether `output` is the
            result of a softmax, or is a tensor of logits.

    # Returns
        Output tensor.
    """
    # Note: tf.nn.softmax_cross_entropy_with_logits
    # expects logits, Keras expects probabilities.
    if not from_logits:
        # scale preds so that the class probas of each sample sum to 1
        output /= tf.reduce_sum(output,
                                len(output.get_shape()) - 1,
                                True)
        # manual computation of crossentropy
        _epsilon = tf.convert_to_tensor(kb.epsilon(), output.dtype.base_dtype)
        output = tf.clip_by_value(output, _epsilon, 1. - _epsilon)
        valid_logs = tf.reduce_sum(target * tf.log(output), axis=3)
        valid_logs *= tf.cast(tf.argmax(target, axis=3) > 0, dtype=kb.floatx())
        return -tf.reduce_sum(valid_logs)
    else:
        return tf.nn.softmax_cross_entropy_with_logits(labels=target,
                                                       logits=output) 
Example 69
Project: CDAE4InfoExtraction   Author: grassknoted   File: capsulelayers.py    MIT License 5 votes vote down vote up
def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon())
    return scale * vectors 
Example 70
Project: BERT   Author: yyht   File: train.py    Apache License 2.0 5 votes vote down vote up
def _mask_loss(y_true, y_pred, y_mask, element_wise_loss):
    l = K.switch(y_mask, element_wise_loss(y_true, y_pred), K.zeros_like(y_mask, dtype=K.floatx()))
    return K.sum(l) / (K.cast(K.sum(y_mask), dtype='float32') + K.epsilon()) 
Example 71
Project: DogEmbeddings   Author: ericzhao28   File: siamese.py    MIT License 5 votes vote down vote up
def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon())) 
Example 72
Project: group-ksparse-temporal-cnns   Author: srph25   File: ops.py    MIT License 5 votes vote down vote up
def n_p(Z, p, axis, epsilon=None):
    if epsilon is None:
        epsilon = K.epsilon()
    return K.pow(K.sum(K.pow(K.abs(Z), p), axis=axis) + epsilon, 1. / p) 
Example 73
Project: group-ksparse-temporal-cnns   Author: srph25   File: ops.py    MIT License 5 votes vote down vote up
def group_norms(inputs, groups, axis, norm=2, epsilon=None):
    if axis == -1:
        axis = K.ndim(inputs) - 1
    if epsilon is None:
        epsilon = K.epsilon()
    inputs_group = tf.split(inputs, num_or_size_splits=groups, axis=axis)
    inputs_group = K.concatenate([K.expand_dims(t, axis=axis) for t in inputs_group], axis=axis)
    return n_p(inputs_group, p=norm, axis=(axis + 1), epsilon=epsilon) 
Example 74
Project: group-ksparse-temporal-cnns   Author: srph25   File: ops.py    MIT License 5 votes vote down vote up
def group_ksparse(x, groups, k, axis_group, axis_sparse, norm=2, alpha=1, epsilon=None):
    if isinstance(axis_group, int):
        axis_group = (axis_group,)
    elif isinstance(axis_group, list):
        axis_group = tuple(axis_group)
    if isinstance(axis_sparse, int):
        axis_sparse = (axis_sparse,)
    elif isinstance(axis_sparse, list):
        axis_sparse = tuple(axis_sparse)
    assert(1 - bool(set(axis_group) & set(axis_sparse)))
    if epsilon is None:
        epsilon = K.epsilon()
    axis_complement = tuple(set(range(K.ndim(x))) - set(axis_group) - set(axis_sparse))
    shape_reduce_group = K.prod([K.shape(x)[j] for j in axis_group])
    shape_reduce_sparse = K.prod([K.shape(x)[j] for j in axis_sparse])
    _k = K.minimum(K.in_train_phase(k, alpha * k), shape_reduce_sparse)
    inputs_permute_dimensions = K.permute_dimensions(x, axis_complement + axis_sparse + axis_group)
    inputs_permute_dimensions_reshape = K.reshape(inputs_permute_dimensions, (-1, shape_reduce_sparse, shape_reduce_group))
    norm_group_permute_dimensions_reshape = group_norms(inputs=inputs_permute_dimensions_reshape, groups=groups, axis=-1, norm=norm, epsilon=epsilon)
    norm_group_permute_dimensions_reshape = K.permute_dimensions(norm_group_permute_dimensions_reshape, (0, 2, 1))
    norm_group_permute_dimensions_reshape = K.reshape(norm_group_permute_dimensions_reshape, (-1, shape_reduce_sparse))
    _, indices = tf.nn.top_k(norm_group_permute_dimensions_reshape, _k)
    scatter_indices = K.concatenate([(K.arange(K.shape(norm_group_permute_dimensions_reshape)[0])[:, None] * K.ones((1, _k), dtype='int32'))[:, :, None], indices[:, :, None]])
    scatter_updates = K.ones((K.shape(norm_group_permute_dimensions_reshape)[0], _k))
    mask_group_permute_dimensions_reshape = K.cast(tf.scatter_nd(scatter_indices, scatter_updates, K.shape(norm_group_permute_dimensions_reshape)), K.floatx())
    mask_group_permute_dimensions_reshape = K.reshape(mask_group_permute_dimensions_reshape, (-1, groups, shape_reduce_sparse))
    mask_group_permute_dimensions_reshape = K.permute_dimensions(mask_group_permute_dimensions_reshape, (0, 2, 1))
    mask_permute_dimensions_reshape = (mask_group_permute_dimensions_reshape[:, :, :, None] * K.ones((1, 1, 1, floor_div(shape_reduce_group, groups))))
    mask_permute_dimensions = K.reshape(mask_permute_dimensions_reshape, K.shape(inputs_permute_dimensions))
    mask = K.permute_dimensions(mask_permute_dimensions, tuple(np.argsort(axis_complement + axis_sparse + axis_group)))
    return mask * x 
Example 75
Project: iMIMIC-RCVs   Author: medgift   File: utils.py    MIT License 5 votes vote down vote up
def normalize(array, min_value=0., max_value=1.):
    """Normalizes the numpy array to (min_value, max_value)

    Args:
        array: The numpy array
        min_value: The min value in normalized array (Default value = 0)
        max_value: The max value in normalized array (Default value = 1)

    Returns:
        The array normalized to range between (min_value, max_value)
    """
    arr_min = np.min(array)
    arr_max = np.max(array)
    normalized = (array - arr_min) / (arr_max - arr_min + K.epsilon())
    return (max_value - min_value) * normalized + min_value 
Example 76
Project: keras_radam   Author: forin-xyz   File: radam.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay:
            lr = lr * (1. / (1. + self.decay * K.cast(
                self.iterations, K.dtype(self.decay)
            )))

        t = K.cast(self.iterations, K.floatx()) + 1.
        beta_1 = self.beta_1
        beta_2 = self.beta_2
        beta_1_t = K.pow(beta_1, t)
        beta_2_t = K.pow(beta_2, t)
        rho_inf = 2. / (1. - beta_2) - 1.
        rho_t = rho_inf - 2. * t * beta_2_t / (1. - beta_2_t)
        r_t = K.sqrt(
            K.relu(rho_t - 4.) * (rho_t - 2.) * rho_inf / (
                (rho_inf - 4.) * (rho_inf - 2.) * rho_t )
        )
        flag = K.cast(rho_t > 4., K.floatx())

        ms = [K.zeros(K.int_shape(p)) for p in params]
        vs = [K.zeros(K.int_shape(p)) for p in params]

        self.weights = [self.iterations] + ms + vs
        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = beta_1 * m + (1. - beta_1) * g
            v_t = beta_2 * v + (1. - beta_2) * K.square(g)

            m_hat_t = m_t / (1. - beta_1_t)
            v_hat_t = K.sqrt(v_t / (1. - beta_2_t))
            new_p = p - lr * (r_t / (v_hat_t + self.epsilon) + flag - 1.)* m_hat_t

            if getattr(p, "constraint", None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
        return self.updates 
Example 77
Project: cbc_networks   Author: saralajew   File: reasoning_layers.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def call(self, inputs, **kwargs):
        # decode the reasoning probabilities
        positive_kernel = self.reasoning_probabilities[0]
        negative_kernel = (1 - positive_kernel) * \
                          self.reasoning_probabilities[1]

        if self.use_component_probabilities:
            # squash component probabilities
            components_probabilities = softmax(self.component_probabilities,
                                               axis=2)

            positive_kernel = positive_kernel * components_probabilities
            negative_kernel = negative_kernel * components_probabilities

        # get normalization tensor
        # stabilize the division with a small epsilon
        normalization = K.sum(positive_kernel + negative_kernel,
                              axis=2,
                              keepdims=True) + K.epsilon()

        # get sliding kernel and bias
        if self.use_pixel_probabilities:
            pixel_probabilities = softmax(self.pixel_probabilities,
                                          axis=(0, 1))
            # scale kernel with priors
            kernel = (positive_kernel - negative_kernel) / normalization \
                     * pixel_probabilities
            bias = K.sum(negative_kernel / normalization
                         * pixel_probabilities,
                         axis=(0, 1, 2),
                         keepdims=True)
        else:
            kernel = (positive_kernel - negative_kernel) / normalization
            bias = K.sum(negative_kernel / normalization,
                         axis=(0, 1, 2),
                         keepdims=True)

        # compute probabilities by a sliding operation
        probs = K.conv2d(inputs, kernel,
                         strides=self.strides,
                         padding=self.padding,
                         data_format='channels_last',
                         dilation_rate=self.dilation_rate) + bias

        if not self.use_pixel_probabilities:
            # divide by number of kernel_size
            probs = probs / np.prod(self.kernel_size)

        # reshape to m x n x #classes x #replicas
        probs = K.reshape(probs,
                          (-1,) + K.int_shape(probs)[1:3]
                          + (self.n_classes, self.n_replicas))

        # squeeze replica dimension if one.
        if self.n_replicas == 1:
            probs = K.squeeze(probs, axis=-1)

        return probs 
Example 78
Project: keras-adamw   Author: OverLordGoldDragon   File: optimizers_225.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.updates.append(K.update_add(self.t_cur, 1))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (1. - 0.5 * (
            K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * (
            K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        shapes = [K.int_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        self.weights = [self.iterations] + ms + vs

        total_iterations = self.total_iterations
        # Cosine annealing
        if self.use_cosine_annealing and total_iterations != 0:
            self.eta_t = _compute_eta_t(self)
        self.lr_t = self.lr * self.eta_t  # for external tracking

        for p, g, m, v in zip(params, grads, ms, vs):
            # Learning rate multipliers
            lr_t = self.lr
            if self.lr_multipliers is not None:
                lr_t = _apply_lr_multiplier(self, lr_t, p)

            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + (1. - self.beta_1) * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g)
            v_t_prime = v_t / (1. - K.pow(self.beta_2, t))
            m_t_bar = (1. - momentum_cache_t) * g_prime + (
                momentum_cache_t_1 * m_t_prime)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            p_t = p - self.eta_t * lr_t * m_t_bar / (
                    K.sqrt(v_t_prime) + self.epsilon)

            # Weight decays
            if p.name in self.weight_decays.keys() and total_iterations != 0:
                p_t = _apply_weight_decays(self, p, p_t)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        self._init_notified = True
        return self.updates 
Example 79
Project: keras_extension   Author: k1414st   File: optimizers.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            eta_l_t = self.terminal_bound - \
                (self.terminal_bound - self.lower_bound) / \
                ((1. - self.beta_2) * t + 1)
            eta_u_t = self.terminal_bound + \
                (self.upper_bound - self.terminal_bound) / \
                ((1. - self.beta_2) * t)

            clipped_lr_t = K.minimum(
                K.maximum(lr_t / (K.sqrt(v_t) + self.epsilon), eta_l_t), eta_u_t)
            p_t = p - clipped_lr_t * m_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example 80
Project: keras_extension   Author: k1414st   File: optimizers.py    MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            eta_l_t = self.terminal_bound - \
                (self.terminal_bound - self.lower_bound) / \
                ((1. - self.beta_2) * t + 1)
            eta_u_t = self.terminal_bound + \
                (self.upper_bound - self.terminal_bound) / \
                ((1. - self.beta_2) * t)

            clipped_lr_t = K.minimum(
                K.maximum(lr_t / (K.sqrt(v_t) + self.epsilon), eta_l_t), eta_u_t)
            p_t = p - clipped_lr_t * m_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates