Python keras.backend.tanh() Examples
The following are 30
code examples of keras.backend.tanh().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: submission_v50.py From Quora with MIT License | 6 votes |
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #2
Source File: attention_layer.py From text-classifier with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): # size of x :[batch_size, sel_len, attention_dim] # size of u :[batch_size, attention_dim] # uit = tanh(xW+b) uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b)) ait = K.dot(uit, self.u) ait = K.squeeze(ait, -1) ait = K.exp(ait) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano ait *= K.cast(mask, K.floatx()) ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) ait = K.expand_dims(ait) weighted_input = x * ait output = K.sum(weighted_input, axis=1) return output
Example #3
Source File: my_layers.py From Attention-Based-Aspect-Extraction with Apache License 2.0 | 6 votes |
def call(self, input_tensor, mask=None): x = input_tensor[0] y = input_tensor[1] mask = mask[0] y = K.transpose(K.dot(self.W, K.transpose(y))) y = K.expand_dims(y, axis=-2) y = K.repeat_elements(y, self.steps, axis=1) eij = K.sum(x * y, axis=-1) if self.bias: b = K.repeat_elements(self.b, self.steps, axis=0) eij += b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) return a
Example #4
Source File: attention_with_context.py From DeepResearch with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #5
Source File: attention.py From deephlapan with GNU General Public License v2.0 | 6 votes |
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
Example #6
Source File: my_layers.py From Unsupervised-Aspect-Extraction with Apache License 2.0 | 6 votes |
def call(self, input_tensor, mask=None): x = input_tensor[0] y = input_tensor[1] mask = mask[0] y = K.transpose(K.dot(self.W, K.transpose(y))) y = K.expand_dims(y, dim=-2) y = K.repeat_elements(y, self.steps, axis=1) eij = K.sum(x*y, axis=-1) if self.bias: b = K.repeat_elements(self.b, self.steps, axis=0) eij += b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) return a
Example #7
Source File: textClassifierHATT.py From textClassifier with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): # size of x :[batch_size, sel_len, attention_dim] # size of u :[batch_size, attention_dim] # uit = tanh(xW+b) uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b)) ait = K.dot(uit, self.u) ait = K.squeeze(ait, -1) ait = K.exp(ait) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano ait *= K.cast(mask, K.floatx()) ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) ait = K.expand_dims(ait) weighted_input = x * ait output = K.sum(weighted_input, axis=1) return output
Example #8
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #9
Source File: layers.py From keras-text with MIT License | 6 votes |
def call(self, x, mask=None): # x: [..., time_steps, features] # ut = [..., time_steps, attention_dims] ut = K.dot(x, self.kernel) if self.use_bias: ut = K.bias_add(ut, self.bias) ut = K.tanh(ut) if self.use_context: ut = ut * self.context_kernel # Collapse `attention_dims` to 1. This indicates the weight for each time_step. ut = K.sum(ut, axis=-1, keepdims=True) # Convert those weights into a distribution but along time axis. # i.e., sum of alphas along `time_steps` axis should be 1. self.at = _softmax(ut, dim=1) if mask is not None: self.at *= K.cast(K.expand_dims(mask, -1), K.floatx()) # Weighted sum along `time_steps` axis. return K.sum(x * self.at, axis=-2)
Example #10
Source File: attentive_convlstm.py From sam with MIT License | 6 votes |
def __init__(self, nb_filters_in, nb_filters_out, nb_filters_att, nb_rows, nb_cols, init='normal', inner_init='orthogonal', attentive_init='zero', activation='tanh', inner_activation='sigmoid', W_regularizer=None, U_regularizer=None, weights=None, go_backwards=False, **kwargs): self.nb_filters_in = nb_filters_in self.nb_filters_out = nb_filters_out self.nb_filters_att = nb_filters_att self.nb_rows = nb_rows self.nb_cols = nb_cols self.init = initializations.get(init) self.inner_init = initializations.get(inner_init) self.attentive_init = initializations.get(attentive_init) self.activation = activations.get(activation) self.inner_activation = activations.get(inner_activation) self.initial_weights = weights self.go_backwards = go_backwards self.W_regularizer = W_regularizer self.U_regularizer = U_regularizer self.input_spec = [InputSpec(ndim=5)] super(AttentiveConvLSTM, self).__init__(**kwargs)
Example #11
Source File: attentive_convlstm.py From sam with MIT License | 6 votes |
def step(self, x, states): x_shape = K.shape(x) h_tm1 = states[0] c_tm1 = states[1] e = self.V_a(K.tanh(self.W_a(h_tm1) + self.U_a(x))) a = K.reshape(K.softmax(K.batch_flatten(e)), (x_shape[0], 1, x_shape[2], x_shape[3])) x_tilde = x * K.repeat_elements(a, x_shape[1], 1) x_i = self.W_i(x_tilde) x_f = self.W_f(x_tilde) x_c = self.W_c(x_tilde) x_o = self.W_o(x_tilde) i = self.inner_activation(x_i + self.U_i(h_tm1)) f = self.inner_activation(x_f + self.U_f(h_tm1)) c = f * c_tm1 + i * self.activation(x_c + self.U_c(h_tm1)) o = self.inner_activation(x_o + self.U_o(h_tm1)) h = o * self.activation(c) return h, [h, c]
Example #12
Source File: Attention.py From delft with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #13
Source File: neural_networks.py From Quora with MIT License | 6 votes |
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #14
Source File: submission_v40.py From Quora with MIT License | 6 votes |
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #15
Source File: insample.py From Quora with MIT License | 6 votes |
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #16
Source File: multi_perspective_layer.py From MatchZoo with Apache License 2.0 | 6 votes |
def _cosine_distance(v1, v2, cosine_norm=True, eps=1e-6): """ Only requires `tf.reduce_sum(v1 * v2, axis=-1)`. :param v1: [batch, time_steps(v1), 1, m, d] :param v2: [batch, 1, time_steps(v2), m, d] :param cosine_norm: True :param eps: 1e-6 :return: [batch, time_steps(v1), time_steps(v2), m] """ cosine_numerator = tf.reduce_sum(v1 * v2, axis=-1) if not cosine_norm: return K.tanh(cosine_numerator) v1_norm = K.sqrt(tf.maximum(tf.reduce_sum(tf.square(v1), axis=-1), eps)) v2_norm = K.sqrt(tf.maximum(tf.reduce_sum(tf.square(v2), axis=-1), eps)) return cosine_numerator / v1_norm / v2_norm
Example #17
Source File: my_layers.py From Aspect-level-sentiment with Apache License 2.0 | 6 votes |
def call(self, input_tensor, mask=None): x = input_tensor[0] aspect = input_tensor[1] mask = mask[0] aspect = K.transpose(K.dot(self.W, K.transpose(aspect))) aspect = K.expand_dims(aspect, axis=-2) aspect = K.repeat_elements(aspect, self.steps, axis=1) eij = K.sum(x*aspect, axis=-1) if self.bias: b = K.repeat_elements(self.b, self.steps, axis=0) eij += b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) return a
Example #18
Source File: models.py From keras_attention with MIT License | 6 votes |
def call(self, h, mask=None): h_shape = K.shape(h) d_w, T = h_shape[0], h_shape[1] logits = K.dot(h, self.w) # w^T h logits = K.reshape(logits, (d_w, T)) alpha = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # exp # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) alpha = alpha * mask alpha = alpha / K.sum(alpha, axis=1, keepdims=True) # softmax r = K.sum(h * K.expand_dims(alpha), axis=1) # r = h*alpha^T h_star = K.tanh(r) # h^* = tanh(r) if self.return_attention: return [h_star, alpha] return h_star
Example #19
Source File: multi_dim_attention.py From nlp_toolkit with MIT License | 6 votes |
def call(self, x, mask=None): uit = K.tanh(K.dot(x, self.Ws1)) ait = K.dot(uit, self.Ws2) ait = K.permute_dimensions(ait, (0, 2, 1)) A = K.softmax(ait, axis=1) M = K.batch_dot(A, x) if self.punish: A_T = K.permute_dimensions(A, (0, 2, 1)) tile_eye = K.tile(K.eye(self.weight_ws2), [self.batch_size, 1]) tile_eye = K.reshape( tile_eye, shape=[-1, self.weight_ws2, self.weight_ws2]) AA_T = K.batch_dot(A, A_T) - tile_eye P = K.l2_normalize(AA_T, axis=(1, 2)) return M, P else: return M
Example #20
Source File: attention.py From nlp_toolkit with MIT License | 6 votes |
def call(self, x, mask=None): # MLP ut = K.dot(x, self.kernel) if self.use_bias: ut = K.bias_add(ut, self.bias) if self.activation: ut = K.tanh(ut) if self.context_kernel: ut = K.dot(ut, self.context_kernel) ut = K.squeeze(ut, axis=-1) # softmax at = K.exp(ut - K.max(ut, axis=-1, keepdims=True)) if mask is not None: at *= K.cast(mask, K.floatx()) att_weights = at / (K.sum(at, axis=1, keepdims=True) + K.epsilon()) # output atx = x * K.expand_dims(att_weights, axis=-1) output = K.sum(atx, axis=1) if self.return_attention: return [output, att_weights] return output
Example #21
Source File: QuestionAttnGRU.py From R-NET-in-Keras with MIT License | 6 votes |
def step(self, inputs, states): uP_t = inputs vP_tm1 = states[0] _ = states[1:3] # ignore internal dropout/masks uQ, WQ_u, WP_v, WP_u, v, W_g1 = states[3:9] uQ_mask, = states[9:10] WQ_u_Dot = K.dot(uQ, WQ_u) #WQ_u WP_v_Dot = K.dot(K.expand_dims(vP_tm1, axis=1), WP_v) #WP_v WP_u_Dot = K.dot(K.expand_dims(uP_t, axis=1), WP_u) # WP_u s_t_hat = K.tanh(WQ_u_Dot + WP_v_Dot + WP_u_Dot) s_t = K.dot(s_t_hat, v) # v s_t = K.batch_flatten(s_t) a_t = softmax(s_t, mask=uQ_mask, axis=1) c_t = K.batch_dot(a_t, uQ, axes=[1, 1]) GRU_inputs = K.concatenate([uP_t, c_t]) g = K.sigmoid(K.dot(GRU_inputs, W_g1)) # W_g1 GRU_inputs = g * GRU_inputs vP_t, s = super(QuestionAttnGRU, self).step(GRU_inputs, states) return vP_t, s
Example #22
Source File: QuestionPooling.py From R-NET-in-Keras with MIT License | 6 votes |
def call(self, inputs, mask=None): assert(isinstance(inputs, list) and len(inputs) == 5) uQ, WQ_u, WQ_v, v, VQ_r = inputs uQ_mask = mask[0] if mask is not None else None ones = K.ones_like(K.sum(uQ, axis=1, keepdims=True)) # (B, 1, 2H) s_hat = K.dot(uQ, WQ_u) s_hat += K.dot(ones, K.dot(WQ_v, VQ_r)) s_hat = K.tanh(s_hat) s = K.dot(s_hat, v) s = K.batch_flatten(s) a = softmax(s, mask=uQ_mask, axis=1) rQ = K.batch_dot(uQ, a, axes=[1, 1]) return rQ
Example #23
Source File: PointerGRU.py From R-NET-in-Keras with MIT License | 6 votes |
def step(self, inputs, states): # input ha_tm1 = states[0] # (B, 2H) _ = states[1:3] # ignore internal dropout/masks hP, WP_h, Wa_h, v = states[3:7] # (B, P, 2H) hP_mask, = states[7:8] WP_h_Dot = K.dot(hP, WP_h) # (B, P, H) Wa_h_Dot = K.dot(K.expand_dims(ha_tm1, axis=1), Wa_h) # (B, 1, H) s_t_hat = K.tanh(WP_h_Dot + Wa_h_Dot) # (B, P, H) s_t = K.dot(s_t_hat, v) # (B, P, 1) s_t = K.batch_flatten(s_t) # (B, P) a_t = softmax(s_t, mask=hP_mask, axis=1) # (B, P) c_t = K.batch_dot(hP, a_t, axes=[1, 1]) # (B, 2H) GRU_inputs = c_t ha_t, (ha_t_,) = super(PointerGRU, self).step(GRU_inputs, states) return a_t, [ha_t]
Example #24
Source File: SelfAttnGRU.py From R-NET-in-Keras with MIT License | 6 votes |
def step(self, inputs, states): vP_t = inputs hP_tm1 = states[0] _ = states[1:3] # ignore internal dropout/masks vP, WP_v, WPP_v, v, W_g2 = states[3:8] vP_mask, = states[8:] WP_v_Dot = K.dot(vP, WP_v) WPP_v_Dot = K.dot(K.expand_dims(vP_t, axis=1), WPP_v) s_t_hat = K.tanh(WPP_v_Dot + WP_v_Dot) s_t = K.dot(s_t_hat, v) s_t = K.batch_flatten(s_t) a_t = softmax(s_t, mask=vP_mask, axis=1) c_t = K.batch_dot(a_t, vP, axes=[1, 1]) GRU_inputs = K.concatenate([vP_t, c_t]) g = K.sigmoid(K.dot(GRU_inputs, W_g2)) GRU_inputs = g * GRU_inputs hP_t, s = super(SelfAttnGRU, self).step(GRU_inputs, states) return hP_t, s
Example #25
Source File: pooling.py From onto-lstm with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): mean = super(IntraAttention, self).call(x, mask) # x: (batch_size, input_length, input_dim) # mean: (batch_size, input_dim) ones = K.expand_dims(K.mean(K.ones_like(x), axis=(0, 2)), dim=0) # (1, input_length) # (batch_size, input_length, input_dim) tiled_mean = K.permute_dimensions(K.dot(K.expand_dims(mean), ones), (0, 2, 1)) if mask is not None: if K.ndim(mask) > K.ndim(x): # Assuming this is because of the bug in Bidirectional. Temporary fix follows. # TODO: Fix Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) x = switch(mask, x, K.zeros_like(x)) # (batch_size, input_length, proj_dim) projected_combination = K.tanh(K.dot(x, self.vector_projector) + K.dot(tiled_mean, self.mean_projector)) scores = K.dot(projected_combination, self.scorer) # (batch_size, input_length) weights = K.softmax(scores) # (batch_size, input_length) attended_x = K.sum(K.expand_dims(weights) * x, axis=1) # (batch_size, input_dim) return attended_x
Example #26
Source File: layers.py From keras-han-for-docla with MIT License | 6 votes |
def _get_attention_weights(self, X): """ Computes the attention weights for each timestep in X :param X: 3d-tensor (batch_size, time_steps, input_dim) :return: 2d-tensor (batch_size, time_steps) of attention weights """ # Compute a time-wise stimulus, i.e. a stimulus for each # time step. For this first compute a hidden layer of # dimension self.context_vector_length and take the # similarity of this layer with self.u as the stimulus u_tw = K.tanh(K.dot(X, self.W)) tw_stimulus = K.dot(u_tw, self.u) # Remove the last axis an apply softmax to the stimulus to # get a probability. tw_stimulus = K.reshape(tw_stimulus, (-1, tw_stimulus.shape[1])) att_weights = K.softmax(tw_stimulus) return att_weights
Example #27
Source File: rnnlayer.py From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License | 6 votes |
def step(self, inputs, states): h_tm1 = states[0] # previous memory #B_U = states[1] # dropout matrices for recurrent units #B_W = states[2] h_tm1a = K.dot(h_tm1, self.Wa) eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va) eijs = K.repeat_elements(eij, self.h_dim, axis=1) #alphaij = K.softmax(eijs) # batchsize * lenh h batchsize * lenh * ndim #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0]) #cisum = K.sum(ci, axis=1) cisum = eijs*inputs[:, :self.h_dim] #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape) zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr)) zi = zr[:, :self.units] ri = zr[:, self.units: 2 * self.units] si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C)) si = (1-zi) * h_tm1 + zi * si_ return si, [si] #h_tm1, [h_tm1]
Example #28
Source File: rnnlayer.py From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License | 6 votes |
def __init__(self, units, h, h_dim, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', #activation='tanh', inner_activation='hard_sigmoid', #W_regularizer=None, U_regularizer=None, b_regularizer=None, #dropout_W=0., dropout_U=0., **kwargs): self.units = units self.h = h[:,-1,:] self.h_dim = h_dim self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) #self.activation = activations.get(activation) #self.inner_activation = activations.get(inner_activation) #self.W_regularizer = regularizers.get(W_regularizer) #self.U_regularizer = regularizers.get(U_regularizer) #self.b_regularizer = regularizers.get(b_regularizer) #self.dropout_W = dropout_W #self.dropout_U = dropout_U #if self.dropout_W or self.dropout_U: # self.uses_learning_phase = True super(Attention, self).__init__(**kwargs)
Example #29
Source File: rnnlayer.py From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License | 6 votes |
def __init__(self, units, h, h_dim, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', #activation='tanh', inner_activation='hard_sigmoid', #W_regularizer=None, U_regularizer=None, b_regularizer=None, #dropout_W=0., dropout_U=0., **kwargs): self.units = units self.h = h[:,-1,:] self.h_dim = h_dim self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) #self.activation = activations.get(activation) #self.inner_activation = activations.get(inner_activation) #self.W_regularizer = regularizers.get(W_regularizer) #self.U_regularizer = regularizers.get(U_regularizer) #self.b_regularizer = regularizers.get(b_regularizer) #self.dropout_W = dropout_W #self.dropout_U = dropout_U #if self.dropout_W or self.dropout_U: # self.uses_learning_phase = True super(SSimpleAttention, self).__init__(**kwargs)
Example #30
Source File: rnnlayer.py From recurrent-attention-for-QA-SQUAD-based-on-keras with MIT License | 6 votes |
def __init__(self, units, h, h_dim, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', #activation='tanh', inner_activation='hard_sigmoid', #W_regularizer=None, U_regularizer=None, b_regularizer=None, #dropout_W=0., dropout_U=0., **kwargs): self.units = units self.h = h self.h_dim = h_dim self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) #self.activation = activations.get(activation) #self.inner_activation = activations.get(inner_activation) #self.W_regularizer = regularizers.get(W_regularizer) #self.U_regularizer = regularizers.get(U_regularizer) #self.b_regularizer = regularizers.get(b_regularizer) #self.dropout_W = dropout_W #self.dropout_U = dropout_U #if self.dropout_W or self.dropout_U: # self.uses_learning_phase = True super(SimpleAttention, self).__init__(**kwargs)