Python keras.activations.sigmoid() Examples

The following are code examples for showing how to use keras.activations.sigmoid(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: workspace_2017   Author: nwiizo   File: test_activations.py    MIT License 6 votes vote down vote up
def test_sigmoid():
    '''
    Test using a numerically stable reference sigmoid implementation
    '''
    def ref_sigmoid(x):
        if x >= 0:
            return 1 / (1 + np.exp(-x))
        else:
            z = np.exp(x)
            return z / (1 + z)
    sigmoid = np.vectorize(ref_sigmoid)

    x = K.placeholder(ndim=2)
    f = K.function([x], [activations.sigmoid(x)])
    test_values = get_standard_values()

    result = f([test_values])[0]
    expected = sigmoid(test_values)
    assert_allclose(result, expected, rtol=1e-05) 
Example 2
Project: workspace_2017   Author: nwiizo   File: test_activations.py    MIT License 6 votes vote down vote up
def test_hard_sigmoid():
    '''
    Test using a reference hard sigmoid implementation
    '''
    def ref_hard_sigmoid(x):
        '''
        Reference hard sigmoid with slope and shift values from theano, see
        https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py
        '''
        x = (x * 0.2) + 0.5
        z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
        return z
    hard_sigmoid = np.vectorize(ref_hard_sigmoid)

    x = K.placeholder(ndim=2)
    f = K.function([x], [activations.hard_sigmoid(x)])
    test_values = get_standard_values()

    result = f([test_values])[0]
    expected = hard_sigmoid(test_values)
    assert_allclose(result, expected, rtol=1e-05) 
Example 3
Project: applications   Author: geomstats   File: activations_test.py    MIT License 6 votes vote down vote up
def test_sigmoid():
    """Test using a numerically stable reference sigmoid implementation.
    """
    def ref_sigmoid(x):
        if x >= 0:
            return 1 / (1 + np.exp(-x))
        else:
            z = np.exp(x)
            return z / (1 + z)
    sigmoid = np.vectorize(ref_sigmoid)

    x = K.placeholder(ndim=2)
    f = K.function([x], [activations.sigmoid(x)])
    test_values = get_standard_values()

    result = f([test_values])[0]
    expected = sigmoid(test_values)
    assert_allclose(result, expected, rtol=1e-05) 
Example 4
Project: applications   Author: geomstats   File: activations_test.py    MIT License 6 votes vote down vote up
def test_hard_sigmoid():
    """Test using a reference hard sigmoid implementation.
    """
    def ref_hard_sigmoid(x):
        x = (x * 0.2) + 0.5
        z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
        return z
    hard_sigmoid = np.vectorize(ref_hard_sigmoid)

    x = K.placeholder(ndim=2)
    f = K.function([x], [activations.hard_sigmoid(x)])
    test_values = get_standard_values()

    result = f([test_values])[0]
    expected = hard_sigmoid(test_values)
    assert_allclose(result, expected, rtol=1e-05) 
Example 5
Project: CBAM-keras   Author: kobiso   File: attention_module.py    MIT License 6 votes vote down vote up
def channel_attention(input_feature, ratio=8):
	
	channel_axis = 1 if K.image_data_format() == "channels_first" else -1
	channel = input_feature._keras_shape[channel_axis]
	
	shared_layer_one = Dense(channel//ratio,
							 activation='relu',
							 kernel_initializer='he_normal',
							 use_bias=True,
							 bias_initializer='zeros')
	shared_layer_two = Dense(channel,
							 kernel_initializer='he_normal',
							 use_bias=True,
							 bias_initializer='zeros')
	
	avg_pool = GlobalAveragePooling2D()(input_feature)    
	avg_pool = Reshape((1,1,channel))(avg_pool)
	assert avg_pool._keras_shape[1:] == (1,1,channel)
	avg_pool = shared_layer_one(avg_pool)
	assert avg_pool._keras_shape[1:] == (1,1,channel//ratio)
	avg_pool = shared_layer_two(avg_pool)
	assert avg_pool._keras_shape[1:] == (1,1,channel)
	
	max_pool = GlobalMaxPooling2D()(input_feature)
	max_pool = Reshape((1,1,channel))(max_pool)
	assert max_pool._keras_shape[1:] == (1,1,channel)
	max_pool = shared_layer_one(max_pool)
	assert max_pool._keras_shape[1:] == (1,1,channel//ratio)
	max_pool = shared_layer_two(max_pool)
	assert max_pool._keras_shape[1:] == (1,1,channel)
	
	cbam_feature = Add()([avg_pool,max_pool])
	cbam_feature = Activation('sigmoid')(cbam_feature)
	
	if K.image_data_format() == "channels_first":
		cbam_feature = Permute((3, 1, 2))(cbam_feature)
	
	return multiply([input_feature, cbam_feature]) 
Example 6
Project: talos   Author: autonomio   File: params.py    MIT License 6 votes vote down vote up
def breast_cancer():

    from keras.optimizers import Adam, Nadam, RMSprop
    from keras.losses import logcosh, binary_crossentropy
    from keras.activations import relu, elu, sigmoid

    # then we can go ahead and set the parameter space
    p = {'lr': (0.5, 5, 10),
         'first_neuron': [4, 8, 16, 32, 64],
         'hidden_layers': [0, 1, 2],
         'batch_size': (2, 30, 10),
         'epochs': [50, 100, 150],
         'dropout': (0, 0.5, 5),
         'shapes': ['brick', 'triangle', 'funnel'],
         'optimizer': [Adam, Nadam, RMSprop],
         'losses': [logcosh, binary_crossentropy],
         'activation': [relu, elu],
         'last_activation': [sigmoid]}

    return p 
Example 7
Project: applications   Author: geomstats   File: activations_test.py    MIT License 5 votes vote down vote up
def test_serialization():
    all_activations = ['softmax', 'relu', 'elu', 'tanh',
                       'sigmoid', 'hard_sigmoid', 'linear',
                       'softplus', 'softsign', 'selu']
    for name in all_activations:
        fn = activations.get(name)
        ref_fn = getattr(activations, name)
        assert fn == ref_fn
        config = activations.serialize(fn)
        fn = activations.deserialize(config)
        assert fn == ref_fn 
Example 8
Project: CBAM-keras   Author: kobiso   File: attention_module.py    MIT License 5 votes vote down vote up
def se_block(input_feature, ratio=8):
	"""Contains the implementation of Squeeze-and-Excitation(SE) block.
	As described in https://arxiv.org/abs/1709.01507.
	"""
	
	channel_axis = 1 if K.image_data_format() == "channels_first" else -1
	channel = input_feature._keras_shape[channel_axis]

	se_feature = GlobalAveragePooling2D()(input_feature)
	se_feature = Reshape((1, 1, channel))(se_feature)
	assert se_feature._keras_shape[1:] == (1,1,channel)
	se_feature = Dense(channel // ratio,
					   activation='relu',
					   kernel_initializer='he_normal',
					   use_bias=True,
					   bias_initializer='zeros')(se_feature)
	assert se_feature._keras_shape[1:] == (1,1,channel//ratio)
	se_feature = Dense(channel,
					   activation='sigmoid',
					   kernel_initializer='he_normal',
					   use_bias=True,
					   bias_initializer='zeros')(se_feature)
	assert se_feature._keras_shape[1:] == (1,1,channel)
	if K.image_data_format() == 'channels_first':
		se_feature = Permute((3, 1, 2))(se_feature)

	se_feature = multiply([input_feature, se_feature])
	return se_feature 
Example 9
Project: CBAM-keras   Author: kobiso   File: attention_module.py    MIT License 5 votes vote down vote up
def spatial_attention(input_feature):
	kernel_size = 7
	
	if K.image_data_format() == "channels_first":
		channel = input_feature._keras_shape[1]
		cbam_feature = Permute((2,3,1))(input_feature)
	else:
		channel = input_feature._keras_shape[-1]
		cbam_feature = input_feature
	
	avg_pool = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(cbam_feature)
	assert avg_pool._keras_shape[-1] == 1
	max_pool = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(cbam_feature)
	assert max_pool._keras_shape[-1] == 1
	concat = Concatenate(axis=3)([avg_pool, max_pool])
	assert concat._keras_shape[-1] == 2
	cbam_feature = Conv2D(filters = 1,
					kernel_size=kernel_size,
					strides=1,
					padding='same',
					activation='sigmoid',
					kernel_initializer='he_normal',
					use_bias=False)(concat)	
	assert cbam_feature._keras_shape[-1] == 1
	
	if K.image_data_format() == "channels_first":
		cbam_feature = Permute((3, 1, 2))(cbam_feature)
		
	return multiply([input_feature, cbam_feature]) 
Example 10
Project: CBAM-keras   Author: kobiso   File: attention_module-checkpoint.py    MIT License 5 votes vote down vote up
def se_block(input_feature, ratio=8):
	"""Contains the implementation of Squeeze-and-Excitation(SE) block.
	As described in https://arxiv.org/abs/1709.01507.
	"""
	
	channel_axis = 1 if K.image_data_format() == "channels_first" else -1
	channel = input_feature._keras_shape[channel_axis]

	se_feature = GlobalAveragePooling2D()(input_feature)
	se_feature = Reshape((1, 1, channel))(se_feature)
	assert se_feature._keras_shape[1:] == (1,1,channel)
	se_feature = Dense(channel // ratio,
					   activation='relu',
					   kernel_initializer='he_normal',
					   use_bias=True,
					   bias_initializer='zeros')(se_feature)
	assert se_feature._keras_shape[1:] == (1,1,channel//ratio)
	se_feature = Dense(channel,
					   activation='sigmoid',
					   kernel_initializer='he_normal',
					   use_bias=True,
					   bias_initializer='zeros')(se_feature)
	assert se_feature._keras_shape[1:] == (1,1,channel)
	if K.image_data_format() == 'channels_first':
		se_feature = Permute((3, 1, 2))(se_feature)

	se_feature = multiply([input_feature, se_feature])
	return se_feature 
Example 11
Project: CBAM-keras   Author: kobiso   File: attention_module-checkpoint.py    MIT License 5 votes vote down vote up
def channel_attention(input_feature, ratio=8):
	
	channel_axis = 1 if K.image_data_format() == "channels_first" else -1
	channel = input_feature._keras_shape[channel_axis]
	
	shared_layer_one = Dense(channel//ratio,
							 activation='relu',
							 kernel_initializer='he_normal',
							 use_bias=True,
							 bias_initializer='zeros')
	shared_layer_two = Dense(channel,
							 kernel_initializer='he_normal',
							 use_bias=True,
							 bias_initializer='zeros')
	
	avg_pool = GlobalAveragePooling2D()(input_feature)    
	avg_pool = Reshape((1,1,channel))(avg_pool)
	assert avg_pool._keras_shape[1:] == (1,1,channel)
	avg_pool = shared_layer_one(avg_pool)
	assert avg_pool._keras_shape[1:] == (1,1,channel//ratio)
	avg_pool = shared_layer_two(avg_pool)
	assert avg_pool._keras_shape[1:] == (1,1,channel)
	
	max_pool = GlobalMaxPooling2D()(input_feature)
	max_pool = Reshape((1,1,channel))(max_pool)
	assert max_pool._keras_shape[1:] == (1,1,channel)
	max_pool = shared_layer_one(max_pool)
	assert max_pool._keras_shape[1:] == (1,1,channel//ratio)
	max_pool = shared_layer_two(max_pool)
	assert max_pool._keras_shape[1:] == (1,1,channel)
	
	cbam_feature = Add()([avg_pool,max_pool])
	cbam_feature = Activation('sigmoid')(cbam_feature)
	
	if K.image_data_format() == "channels_first":
		cbam_feature = Permute((3, 1, 2))(cbam_feature)
	
	return multiply([input_feature, cbam_feature]) 
Example 12
Project: ntm_keras   Author: flomlo   File: ntm.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, units, 
                        n_slots=50,
                        m_depth=20,
                        shift_range=3,
                        controller_model=None,
                        read_heads=1,
                        write_heads=1,
                        activation='sigmoid',
                        batch_size=777,                 
                        stateful=False,
                        **kwargs):
        self.output_dim = units
        self.units = units
        self.n_slots = n_slots
        self.m_depth = m_depth
        self.shift_range = shift_range
        self.controller = controller_model
        self.activation = get_activations(activation)
        self.read_heads = read_heads
        self.write_heads = write_heads
        self.batch_size = batch_size

#        self.return_sequence = True
        try:
            if controller.state.stateful:
                self.controller_with_state = True 
        except:
            self.controller_with_state = False


        self.controller_read_head_emitting_dim = _controller_read_head_emitting_dim(m_depth, shift_range)
        self.controller_write_head_emitting_dim = _controller_write_head_emitting_dim(m_depth, shift_range)

        super(NeuralTuringMachine, self).__init__(**kwargs) 
Example 13
Project: CIAN   Author: yanghanxy   File: model_library.py    MIT License 5 votes vote down vote up
def call(self, x):
        y = K.dot(x, self.W_carry)
        if self.bias:
            y += self.b_carry
        transform_weight = activations.sigmoid(y)
        y = K.dot(x, self.W)
        if self.bias:
            y += self.b
        act = self.activation(y)
        act *= transform_weight
        output = act + (1 - transform_weight) * x
        return output 
Example 14
Project: kepler   Author: jaidevd   File: checks.py    MIT License 5 votes vote down vote up
def check(self, model):
        for l in model.layers[:-1]:
            if l.activation == sigmoid:
                return False
        return True

# Checks that happen during training ########################################## 
Example 15
Project: ECG_Heartbeat_Classification   Author: CVxTz   File: baseline_ptbdb_transfer_fullupdate.py    MIT License 5 votes vote down vote up
def get_model():
    nclass = 1
    inp = Input(shape=(187, 1))
    img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")(inp)
    img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = GlobalMaxPool1D()(img_1)
    img_1 = Dropout(rate=0.2)(img_1)

    dense_1 = Dense(64, activation=activations.relu, name="dense_1")(img_1)
    dense_1 = Dense(64, activation=activations.relu, name="dense_2")(dense_1)
    dense_1 = Dense(nclass, activation=activations.sigmoid, name="dense_3_ptbdb")(dense_1)

    model = models.Model(inputs=inp, outputs=dense_1)
    opt = optimizers.Adam(0.001)

    model.compile(optimizer=opt, loss=losses.binary_crossentropy, metrics=['acc'])
    model.summary()
    return model 
Example 16
Project: ECG_Heartbeat_Classification   Author: CVxTz   File: baseline_ptbdb.py    MIT License 5 votes vote down vote up
def get_model():
    nclass = 1
    inp = Input(shape=(187, 1))
    img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")(inp)
    img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid")(img_1)
    img_1 = GlobalMaxPool1D()(img_1)
    img_1 = Dropout(rate=0.2)(img_1)

    dense_1 = Dense(64, activation=activations.relu, name="dense_1")(img_1)
    dense_1 = Dense(64, activation=activations.relu, name="dense_2")(dense_1)
    dense_1 = Dense(nclass, activation=activations.sigmoid, name="dense_3_ptbdb")(dense_1)

    model = models.Model(inputs=inp, outputs=dense_1)
    opt = optimizers.Adam(0.001)

    model.compile(optimizer=opt, loss=losses.binary_crossentropy, metrics=['acc'])
    model.summary()
    return model 
Example 17
Project: ECG_Heartbeat_Classification   Author: CVxTz   File: baseline_ptbdb_transfer_freeze.py    MIT License 5 votes vote down vote up
def get_model():
    nclass = 1
    inp = Input(shape=(187, 1))
    img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid", trainable=False)(inp)
    img_1 = Convolution1D(16, kernel_size=5, activation=activations.relu, padding="valid", trainable=False)(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid", trainable=False)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid", trainable=False)(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid", trainable=False)(img_1)
    img_1 = Convolution1D(32, kernel_size=3, activation=activations.relu, padding="valid", trainable=False)(img_1)
    img_1 = MaxPool1D(pool_size=2)(img_1)
    img_1 = Dropout(rate=0.1)(img_1)
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid", trainable=False)(img_1)
    img_1 = Convolution1D(256, kernel_size=3, activation=activations.relu, padding="valid", trainable=False)(img_1)
    img_1 = GlobalMaxPool1D()(img_1)
    img_1 = Dropout(rate=0.2)(img_1)

    dense_1 = Dense(64, activation=activations.relu, name="dense_1")(img_1)
    dense_1 = Dense(64, activation=activations.relu, name="dense_2")(dense_1)
    dense_1 = Dense(nclass, activation=activations.sigmoid, name="dense_3_ptbdb")(dense_1)

    model = models.Model(inputs=inp, outputs=dense_1)
    opt = optimizers.Adam(0.001)

    model.compile(optimizer=opt, loss=losses.binary_crossentropy, metrics=['acc'])
    model.summary()
    return model 
Example 18
Project: talos   Author: autonomio   File: params.py    MIT License 5 votes vote down vote up
def titanic():

    # here use a standard 2d dictionary for inputting the param boundaries
    p = {'lr': (0.5, 5, 10),
         'first_neuron': [4, 8, 16],
         'batch_size': [20, 30, 40],
         'dropout': (0, 0.5, 5),
         'optimizer': ['Adam', 'Nadam'],
         'losses': ['logcosh', 'binary_crossentropy'],
         'activation': ['relu', 'elu'],
         'last_activation': ['sigmoid']}

    return p 
Example 19
Project: C3AE   Author: StevenBanama   File: C3AE.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def SE_BLOCK(input, using_SE=True, r_factor=2):
    if not using_SE:
        return input
    channel_nums = input.get_shape()[-1].value
    ga_pooling = GlobalAveragePooling2D()(input)
    fc1 = Dense(channel_nums//r_factor)(ga_pooling)
    scale = Dense(channel_nums, activation=sigmoid)(ReLU()(fc1))
    return multiply([scale, input]) 
Example 20
Project: ntm_keras   Author: flomlo   File: ntm.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _split_and_apply_activations(self, controller_output):
        """ This takes the controller output, splits it in ntm_output, read and wright adressing data.
            It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write.
            ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing
            the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions,
            consisting of an erase and an add vector. 

            As it is necesseary for stable results,
            k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!),
            shift via softmax,
            gamma is sigmoided, inversed and clipped (probably not ideal)
            g is sigmoided,
            beta is linear (probably not ideal!) """
        
        # splitting
        ntm_output, controller_instructions_read, controller_instructions_write = tf.split(
                    controller_output,
                    np.asarray([self.output_dim,
                                self.read_heads * self.controller_read_head_emitting_dim,
                                self.write_heads * self.controller_write_head_emitting_dim]),
                    axis=1)

        controller_instructions_read = tf.split(controller_instructions_read, self.read_heads, axis=1)
        controller_instructions_write = tf.split(controller_instructions_write, self.write_heads, axis=1)

        controller_instructions_read = [
                tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for 
                single_head_data in controller_instructions_read]
        
        controller_instructions_write = [
                tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for 
                single_head_data in controller_instructions_write]
        
        #activation
        ntm_output = self.activation(ntm_output)
        controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for
                (k, beta, g, shift, gamma) in controller_instructions_read]
        controller_instructions_write = [
                (tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector))  for 
                (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write]
       
        return (ntm_output, controller_instructions_read, controller_instructions_write) 
Example 21
Project: keras-attention   Author: datalogue   File: custom_recurrents.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st] 
Example 22
Project: TranskribusDU   Author: Transkribus   File: attentiondecoder.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]