Python keras.activations.tanh() Examples

The following are code examples for showing how to use keras.activations.tanh(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: Jtyoui   Author: jtyoui   File: HandWritingRecognition.py    MIT License 6 votes vote down vote up
def nn_model():
    (x_train, y_train), _ = mnist.load_data()
    # 归一化
    x_train = x_train.reshape(x_train.shape[0], -1) / 255.
    # one-hot
    y_train = np_utils.to_categorical(y=y_train, num_classes=10)
    # constant(value=1.)自定义常数,constant(value=1.)===one()
    # 创建模型:输入784个神经元,输出10个神经元
    model = Sequential([
        Dense(units=200, input_dim=784, bias_initializer=constant(value=1.), activation=tanh),
        Dense(units=100, bias_initializer=one(), activation=tanh),
        Dense(units=10, bias_initializer=one(), activation=softmax),
    ])

    opt = SGD(lr=0.2, clipnorm=1.)  # 优化器
    model.compile(optimizer=opt, loss=categorical_crossentropy, metrics=['acc', 'mae'])  # 编译
    model.fit(x_train, y_train, batch_size=64, epochs=20, callbacks=[RemoteMonitor()])
    model_save(model, './model.h5') 
Example 2
Project: CAPTCHA-breaking   Author: lllcho   File: test_activations.py    MIT License 6 votes vote down vote up
def test_relu():
    '''
    Relu implementation doesn't depend on the value being
    a theano variable. Testing ints, floats and theano tensors.
    '''

    from keras.activations import relu as r

    assert r(5) == 5
    assert r(-5) == 0
    assert r(-0.1) == 0
    assert r(0.1) == 0.1

    x = T.vector()
    exp = r(x)
    f = theano.function([x], exp)

    test_values = get_standard_values()
    result = f(test_values)

    list_assert_equal(result, test_values) # because no negatives in test values 
Example 3
Project: CAPTCHA-breaking   Author: lllcho   File: test_activations.py    MIT License 6 votes vote down vote up
def test_tanh():

    from keras.activations import tanh as t
    test_values = get_standard_values()

    x = T.vector()
    exp = t(x)
    f = theano.function([x], exp)

    result = f(test_values)
    expected = [math.tanh(v) for v in test_values]

    print(result)
    print(expected)

    list_assert_equal(result, expected) 
Example 4
Project: pointer-networks-experiments   Author: zygmuntz   File: PointerLSTM.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def step(self, x_input, states):
    	#print "x_input:", x_input, x_input.shape
    	# <TensorType(float32, matrix)>
    	
        input_shape = self.input_spec[0].shape
        en_seq = states[-1]
        _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1])

        # vt*tanh(W1*e+W2*d)
        dec_seq = K.repeat(h, input_shape[1])
        Eij = time_distributed_dense(en_seq, self.W1, output_dim=1)
        Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1)
        U = self.vt * tanh(Eij + Dij)
        U = K.squeeze(U, 2)

        # make probability tensor
        pointer = softmax(U)
        return pointer, [h, c] 
Example 5
Project: GAN-MRI   Author: simontomaskarlsson   File: UNIT.py    GNU General Public License v3.0 6 votes vote down vote up
def modelGenerator(self, name):
        inputImg = Input(shape=self.latent_dim)
        # Layer 1: 1 res block
        x = self.resblk(inputImg, 256)
        # Layer 2: 2 res block
        x = self.resblk(x, 256)
        # Layer 3: 3 res block
        x = self.resblk(x, 256)
        # Layer 4:
        x = Conv2DTranspose(128, kernel_size=3, strides=2, padding='same')(x)
        x = LeakyReLU(alpha=0.01)(x)
        # Layer 5:
        x = Conv2DTranspose(64, kernel_size=3, strides=2, padding='same')(x)
        x = LeakyReLU(alpha=0.01)(x)
        # Layer 6
        x = Conv2DTranspose(self.channels, kernel_size=1, strides=1, padding='valid')(x)
        z = Activation("tanh")(x)

        return Model(inputs=inputImg, outputs=z, name=name) 
Example 6
Project: workspace_2017   Author: nwiizo   File: test_activations.py    MIT License 5 votes vote down vote up
def test_tanh():
    test_values = get_standard_values()

    x = K.placeholder(ndim=2)
    exp = activations.tanh(x)
    f = K.function([x], [exp])

    result = f([test_values])[0]
    expected = np.tanh(test_values)
    assert_allclose(result, expected, rtol=1e-05) 
Example 7
Project: applications   Author: geomstats   File: activations_test.py    MIT License 5 votes vote down vote up
def test_serialization():
    all_activations = ['softmax', 'relu', 'elu', 'tanh',
                       'sigmoid', 'hard_sigmoid', 'linear',
                       'softplus', 'softsign', 'selu']
    for name in all_activations:
        fn = activations.get(name)
        ref_fn = getattr(activations, name)
        assert fn == ref_fn
        config = activations.serialize(fn)
        fn = activations.deserialize(config)
        assert fn == ref_fn 
Example 8
Project: applications   Author: geomstats   File: activations_test.py    MIT License 5 votes vote down vote up
def test_tanh():
    test_values = get_standard_values()

    x = K.placeholder(ndim=2)
    exp = activations.tanh(x)
    f = K.function([x], [exp])

    result = f([test_values])[0]
    expected = np.tanh(test_values)
    assert_allclose(result, expected, rtol=1e-05) 
Example 9
Project: guide-actor-critic   Author: voot-t   File: GAC_learner.py    MIT License 5 votes vote down vote up
def create_policy_network(self):
        """ Create a policy netwrok """
        input_s = Input(shape=(self.ds,), dtype="float32", name="input_s")    
        
        h_mean = input_s 
        for i in range(0, hidden_layer_sep_pol):
            h_mean = Dense(hidden_units_list_pol[i], activation=hidden_activation, kernel_regularizer=self.reg_policy, kernel_initializer=initializer)(h_mean)

        mean = Dense(self.da, activation="tanh", kernel_regularizer=self.reg_policy, kernel_initializer=final_initializer)(h_mean)
        mean = Lambda(lambda mean, a_scaler: mean*a_scaler, output_shape=(self.da,), arguments={'a_scaler': self.a_scale})(mean)    # This layer scales the tanh output from [-1, 1] to [-a_scale, a_scale]

        deep_mean_model = Model(inputs=[input_s], outputs=[mean])
        deep_mean_model.compile(loss="mse", optimizer=self.optimizer_policy)    # GAC minimizes MSE to learn the policy
        return deep_mean_model 
Example 10
Project: keras-attention   Author: datalogue   File: custom_recurrents.py    GNU Affero General Public License v3.0 5 votes vote down vote up
def get_initial_state(self, inputs):
        print('inputs shape:', inputs.get_shape())

        # apply the matrix on the first time step to get the initial s0.
        s0 = activations.tanh(K.dot(inputs[:, 0], self.W_s))

        # from keras.layers.recurrent to initialize a vector of (batchsize,
        # output_dim)
        y0 = K.zeros_like(inputs)  # (samples, timesteps, input_dims)
        y0 = K.sum(y0, axis=(1, 2))  # (samples, )
        y0 = K.expand_dims(y0)  # (samples, 1)
        y0 = K.tile(y0, [1, self.output_dim])

        return [y0, s0] 
Example 11
Project: TranskribusDU   Author: Transkribus   File: attentiondecoder.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_initial_state(self, inputs):
        print('inputs shape:', inputs.get_shape())

        # apply the matrix on the first time step to get the initial s0.
        s0 = activations.tanh(K.dot(inputs[:, 0], self.W_s))

        # from keras.layers.recurrent to initialize a vector of (batchsize,
        # output_dim)
        y0 = K.zeros_like(inputs)  # (samples, timesteps, input_dims)
        y0 = K.sum(y0, axis=(1, 2))  # (samples, )
        y0 = K.expand_dims(y0)  # (samples, 1)
        y0 = K.tile(y0, [1, self.output_dim])

        return [y0, s0] 
Example 12
Project: ntm_keras   Author: flomlo   File: ntm.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def _split_and_apply_activations(self, controller_output):
        """ This takes the controller output, splits it in ntm_output, read and wright adressing data.
            It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write.
            ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing
            the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions,
            consisting of an erase and an add vector. 

            As it is necesseary for stable results,
            k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!),
            shift via softmax,
            gamma is sigmoided, inversed and clipped (probably not ideal)
            g is sigmoided,
            beta is linear (probably not ideal!) """
        
        # splitting
        ntm_output, controller_instructions_read, controller_instructions_write = tf.split(
                    controller_output,
                    np.asarray([self.output_dim,
                                self.read_heads * self.controller_read_head_emitting_dim,
                                self.write_heads * self.controller_write_head_emitting_dim]),
                    axis=1)

        controller_instructions_read = tf.split(controller_instructions_read, self.read_heads, axis=1)
        controller_instructions_write = tf.split(controller_instructions_write, self.write_heads, axis=1)

        controller_instructions_read = [
                tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for 
                single_head_data in controller_instructions_read]
        
        controller_instructions_write = [
                tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for 
                single_head_data in controller_instructions_write]
        
        #activation
        ntm_output = self.activation(ntm_output)
        controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for
                (k, beta, g, shift, gamma) in controller_instructions_read]
        controller_instructions_write = [
                (tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector))  for 
                (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write]
       
        return (ntm_output, controller_instructions_read, controller_instructions_write) 
Example 13
Project: NPRF   Author: ucasir   File: nprf_drmm.py    Apache License 2.0 4 votes vote down vote up
def build(self):

    dd_q_input = Input((self.config.nb_supervised_doc, self.config.doc_topk_term, 1), name='dd_q_input')
    dd_d_input = Input((self.config.nb_supervised_doc, self.config.doc_topk_term,
                        self.config.hist_size), name='dd_d_input')

    dd_q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False, name='dd_q_gate')(dd_q_input)
    dd_q_w = Lambda(lambda x: softmax(x, axis=2), output_shape=(
                  self.config.nb_supervised_doc, self.config.doc_topk_term,), name='dd_q_softmax')(dd_q_w)

    z = dd_d_input
    for i in range(self.config.nb_layers):
      z = Dense(self.config.hidden_size[i], activation='tanh',
                kernel_initializer=self.initializer_fc, name='hidden')(z)
    z = Dense(self.config.out_size, kernel_initializer=self.initializer_fc, name='dd_d_gate')(z)
    z = Reshape((self.config.nb_supervised_doc, self.config.doc_topk_term,))(z)
    dd_q_w = Reshape((self.config.nb_supervised_doc, self.config.doc_topk_term,))(dd_q_w)
    # out = Dot(axes=[2, 2], name='dd_pseudo_out')([z, dd_q_w])

    out = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[2, 2]), name='dd_pseudo_out')([z, dd_q_w])
    dd_init_out = Lambda(lambda x: tf.matrix_diag_part(x), output_shape=(self.config.nb_supervised_doc,), name='dd_init_out')(out)
    '''
    dd_init_out = Lambda(lambda x: tf.reduce_sum(x, axis=2), output_shape=(self.config.nb_supervised_doc,))(z)
    '''
    #dd_out = Reshape((self.config.nb_supervised_doc,))(dd_out)

    # dd out gating
    dd_gate = Input((self.config.nb_supervised_doc, 1), name='baseline_doc_score')
    dd_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False, name='dd_gate')(dd_gate)
    # dd_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config.nb_supervised_doc,), name='dd_softmax')(dd_w)

    # dd_out = Dot(axes=[1, 1], name='dd_out')([dd_init_out, dd_w])
    dd_w = Reshape((self.config.nb_supervised_doc,))(dd_w)
    dd_init_out = Reshape((self.config.nb_supervised_doc,))(dd_init_out)


    if self.config.method in [1, 3]: # no doc gating, with dense layer
      z = dd_init_out
    elif self.config.method == 2:
      logging.info("Apply doc gating")
      z = Multiply(name='dd_out')([dd_init_out, dd_w])
    else:
      raise ValueError("Method not initialized, please check config file")

    if self.config.method in [1, 2]:
      logging.info("Dense layer on top")
      z = Dense(self.config.merge_hidden, activation='tanh', name='merge_hidden')(z)
      out = Dense(self.config.merge_out, name='score')(z)
    else:
      logging.info("Apply doc gating, No dense layer on top, sum up scores")
      out = Dot(axes=[1, 1], name='score')([z, dd_w])

    model = Model(inputs=[dd_q_input, dd_d_input, dd_gate], outputs=[out])
    print(model.summary())

    return model 
Example 14
Project: keras-attention   Author: datalogue   File: custom_recurrents.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def __init__(self, units, output_dim,
                 activation='tanh',
                 return_probabilities=False,
                 name='AttentionDecoder',
                 kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        """
        Implements an AttentionDecoder that takes in a sequence encoded by an
        encoder and outputs the decoded states 
        :param units: dimension of the hidden state and the attention matrices
        :param output_dim: the number of labels in the output space

        references:
            Bahdanau, Dzmitry, Kyunghyun Cho, and Yoshua Bengio. 
            "Neural machine translation by jointly learning to align and translate." 
            arXiv preprint arXiv:1409.0473 (2014).
        """
        self.units = units
        self.output_dim = output_dim
        self.return_probabilities = return_probabilities
        self.activation = activations.get(activation)
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.recurrent_initializer = initializers.get(recurrent_initializer)
        self.bias_initializer = initializers.get(bias_initializer)

        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.recurrent_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.kernel_constraint = constraints.get(kernel_constraint)
        self.recurrent_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)

        super(AttentionDecoder, self).__init__(**kwargs)
        self.name = name
        self.return_sequences = True  # must return sequences 
Example 15
Project: keras-attention   Author: datalogue   File: custom_recurrents.py    GNU Affero General Public License v3.0 4 votes vote down vote up
def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st] 
Example 16
Project: TranskribusDU   Author: Transkribus   File: attentiondecoder.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def __init__(self, units, output_dim,
                 activation='tanh',
                 return_probabilities=False,
                 name='AttentionDecoder',
                 kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        """
        Implements an AttentionDecoder that takes in a sequence encoded by an
        encoder and outputs the decoded states 
        :param units: dimension of the hidden state and the attention matrices
        :param output_dim: the number of labels in the output space
        references:
            Bahdanau, Dzmitry, Kyunghyun Cho, and Yoshua Bengio. 
            "Neural machine translation by jointly learning to align and translate." 
            arXiv preprint arXiv:1409.0473 (2014).
        """
        self.units = units
        self.output_dim = output_dim
        self.return_probabilities = return_probabilities
        self.activation = activations.get(activation)
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.recurrent_initializer = initializers.get(recurrent_initializer)
        self.bias_initializer = initializers.get(bias_initializer)

        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.recurrent_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.kernel_constraint = constraints.get(kernel_constraint)
        self.recurrent_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)

        super(AttentionDecoder, self).__init__(**kwargs)
        self.name = name
        self.return_sequences = True  # must return sequences 
Example 17
Project: TranskribusDU   Author: Transkribus   File: attentiondecoder.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]