Python keras.backend.dropout() Examples
The following are 30 code examples for showing how to use keras.backend.dropout(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example 1
Project: embedding-as-service Author: amansrivastava17 File: qrnn.py License: MIT License | 6 votes |
def preprocess_input(self, inputs, training=None): if self.window_size > 1: inputs = K.temporal_padding(inputs, (self.window_size - 1, 0)) inputs = K.expand_dims(inputs, 2) # add a dummy dimension output = K.conv2d(inputs, self.kernel, strides=self.strides, padding='valid', data_format='channels_last') output = K.squeeze(output, 2) # remove the dummy dimension if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.dropout is not None and 0. < self.dropout < 1.: z = output[:, :, :self.units] f = output[:, :, self.units:2 * self.units] o = output[:, :, 2 * self.units:] f = K.in_train_phase(1 - _dropout(1 - f, self.dropout), f, training=training) return K.concatenate([z, f, o], -1) else: return output
Example 2
Project: recurrent-attention-for-QA-SQUAD-based-on-keras Author: wentaozhu File: rnnlayer.py License: MIT License | 6 votes |
def get_constants(self, inputs, training=None): constants = [] '''if 0 < self.dropout_U < 1: ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)] constants.append(B_U) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) if 0 < self.dropout_W < 1: input_shape = K.int_shape(x) input_dim = input_shape[-1] ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, int(input_dim))) B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)] constants.append(B_W) else:''' constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
Example 3
Project: recurrent-attention-for-QA-SQUAD-based-on-keras Author: wentaozhu File: rnnlayer.py License: MIT License | 6 votes |
def get_constants(self, inputs, training=None): constants = [] '''if 0 < self.dropout_U < 1: ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)] constants.append(B_U) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) if 0 < self.dropout_W < 1: input_shape = K.int_shape(x) input_dim = input_shape[-1] ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, int(input_dim))) B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)] constants.append(B_W) else:''' constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
Example 4
Project: recurrent-attention-for-QA-SQUAD-based-on-keras Author: wentaozhu File: rnnlayer.py License: MIT License | 6 votes |
def get_constants(self, inputs, training=None): constants = [] '''if 0 < self.dropout_U < 1: ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)] constants.append(B_U) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) if 0 < self.dropout_W < 1: input_shape = K.int_shape(x) input_dim = input_shape[-1] ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, int(input_dim))) B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)] constants.append(B_W) else:''' constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
Example 5
Project: recurrent-attention-for-QA-SQUAD-based-on-keras Author: wentaozhu File: rnnlayer.py License: MIT License | 6 votes |
def step(self, inputs, states): h_tm1 = states[0] # previous memory #B_U = states[1] # dropout matrices for recurrent units #B_W = states[2] h_tm1a = K.dot(h_tm1, self.Wa) eij = K.dot(K.tanh(h_tm1a + K.dot(inputs[:, :self.h_dim], self.Ua)), self.Va) eijs = K.repeat_elements(eij, self.h_dim, axis=1) #alphaij = K.softmax(eijs) # batchsize * lenh h batchsize * lenh * ndim #ci = K.permute_dimensions(K.permute_dimensions(self.h, [2,0,1]) * alphaij, [1,2,0]) #cisum = K.sum(ci, axis=1) cisum = eijs*inputs[:, :self.h_dim] #print(K.shape(cisum), cisum.shape, ci.shape, self.h.shape, alphaij.shape, x.shape) zr = K.sigmoid(K.dot(inputs[:, self.h_dim:], self.Wzr) + K.dot(h_tm1, self.Uzr) + K.dot(cisum, self.Czr)) zi = zr[:, :self.units] ri = zr[:, self.units: 2 * self.units] si_ = K.tanh(K.dot(inputs[:, self.h_dim:], self.W) + K.dot(ri*h_tm1, self.U) + K.dot(cisum, self.C)) si = (1-zi) * h_tm1 + zi * si_ return si, [si] #h_tm1, [h_tm1]
Example 6
Project: nn_playground Author: DingKe File: qrnn.py License: MIT License | 6 votes |
def preprocess_input(self, inputs, training=None): if self.window_size > 1: inputs = K.temporal_padding(inputs, (self.window_size-1, 0)) inputs = K.expand_dims(inputs, 2) # add a dummy dimension output = K.conv2d(inputs, self.kernel, strides=self.strides, padding='valid', data_format='channels_last') output = K.squeeze(output, 2) # remove the dummy dimension if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.dropout is not None and 0. < self.dropout < 1.: z = output[:, :, :self.units] f = output[:, :, self.units:2 * self.units] o = output[:, :, 2 * self.units:] f = K.in_train_phase(1 - _dropout(1 - f, self.dropout), f, training=training) return K.concatenate([z, f, o], -1) else: return output
Example 7
Project: nn_playground Author: DingKe File: qrnn.py License: MIT License | 6 votes |
def get_config(self): config = {'units': self.units, 'window_size': self.window_size, 'stride': self.strides[0], 'return_sequences': self.return_sequences, 'go_backwards': self.go_backwards, 'stateful': self.stateful, 'unroll': self.unroll, 'use_bias': self.use_bias, 'dropout': self.dropout, 'activation': activations.serialize(self.activation), 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'input_dim': self.input_dim, 'input_length': self.input_length} base_config = super(QRNN, self).get_config() return dict(list(base_config.items()) + list(config.items()))
Example 8
Project: nn_playground Author: DingKe File: ternary_layers.py License: MIT License | 6 votes |
def step(self, inputs, states): if 0 < self.dropout < 1: h = ternarize_dot(inputs * states[1], self.kernel) else: h = ternarize_dot(inputs, self.kernel) if self.bias is not None: h = K.bias_add(h, self.bias) prev_output = states[0] if 0 < self.recurrent_dropout < 1: prev_output *= states[2] output = h + ternarize_dot(prev_output, self.recurrent_kernel) if self.activation is not None: output = self.activation(output) # Properly set learning phase on output tensor. if 0 < self.dropout + self.recurrent_dropout: output._uses_learning_phase = True return output, [output]
Example 9
Project: TT_RNN Author: Tuyki File: TTRNN.py License: MIT License | 6 votes |
def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout} base_config = super(TT_RNN, self).get_config() return dict(list(base_config.items()) + list(config.items()))
Example 10
Project: TT_RNN Author: Tuyki File: TTRNN.py License: MIT License | 6 votes |
def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout} base_config = super(TT_GRU, self).get_config() return dict(list(base_config.items()) + list(config.items()))
Example 11
Project: chemical_vae Author: aspuru-guzik-group File: tgru_k2_gpu.py License: Apache License 2.0 | 6 votes |
def get_constants(self, inputs, training=None): constants = [] if 0. < self.recurrent_dropout < 1.: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) def dropped_inputs(): return K.dropout(ones, self.recurrent_dropout) rec_dp_mask = [K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3)] constants.append(rec_dp_mask) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
Example 12
Project: PhasedLSTM-Keras Author: fferroni File: PhasedLSTM.py License: MIT License | 6 votes |
def preprocess_input(self, inputs, training=None): if self.implementation == 0: input_shape = K.int_shape(inputs) input_dim = input_shape[2] timesteps = input_shape[1] x_i = _time_distributed_dense(inputs, self.kernel_i, self.bias_i, self.dropout, input_dim, self.units, timesteps, training=training) x_f = _time_distributed_dense(inputs, self.kernel_f, self.bias_f, self.dropout, input_dim, self.units, timesteps, training=training) x_c = _time_distributed_dense(inputs, self.kernel_c, self.bias_c, self.dropout, input_dim, self.units, timesteps, training=training) x_o = _time_distributed_dense(inputs, self.kernel_o, self.bias_o, self.dropout, input_dim, self.units, timesteps, training=training) return K.concatenate([x_i, x_f, x_c, x_o], axis=2) else: return inputs
Example 13
Project: PhasedLSTM-Keras Author: fferroni File: PhasedLSTM.py License: MIT License | 6 votes |
def get_config(self): config = {'units': self.units, 'activation': activations.serialize(self.activation), 'recurrent_activation': activations.serialize(self.recurrent_activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'unit_forget_bias': self.unit_forget_bias, 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout} base_config = super(PhasedLSTM, self).get_config() return dict(list(base_config.items()) + list(config.items()))
Example 14
Project: deep-models Author: LaurentMazare File: lstm_zoneout.py License: Apache License 2.0 | 5 votes |
def zoneout(self, v, prev_v, pr=0.): diff = v - prev_v diff = K.in_train_phase(K.dropout(diff, pr, noise_shape=(self.output_dim,)), diff) # In testing, always return v * (1-pr) + prev_v * pr # In training when K.dropout returns 0, return prev_v # when K.dropout returns diff/(1-pr), return v return prev_v + diff * (1-pr)
Example 15
Project: gandlf Author: codekansas File: core.py License: MIT License | 5 votes |
def call(self, x, mask=None): if 0. < self.p < 1.: noise_shape = self._get_noise_shape(x) x = K.dropout(x, self.p, noise_shape) return x
Example 16
Project: Dropout_BBalpha Author: YingzhenLi File: BBalpha_dropout.py License: MIT License | 5 votes |
def Dropout_mc(p): layer = Lambda(lambda x: K.dropout(x, p), output_shape=lambda shape: shape) return layer
Example 17
Project: Dropout_BBalpha Author: YingzhenLi File: BBalpha_dropout.py License: MIT License | 5 votes |
def get_logit_mlp_layers(nb_layers, nb_units, p, wd, nb_classes, layers = [], \ dropout = 'none'): if dropout == 'MC': D = Dropout_mc if dropout == 'pW': D = pW if dropout == 'none': D = Identity for _ in xrange(nb_layers): layers.append(D(p)) layers.append(Dense(nb_units, activation='relu', W_regularizer=l2(wd))) layers.append(D(p)) layers.append(Dense(nb_classes, W_regularizer=l2(wd))) return layers
Example 18
Project: Dropout_BBalpha Author: YingzhenLi File: BBalpha_dropout.py License: MIT License | 5 votes |
def get_logit_cnn_layers(nb_units, p, wd, nb_classes, layers = [], dropout = False): # number of convolutional filters to use nb_filters = 32 # size of pooling area for max pooling pool_size = (2, 2) # convolution kernel size kernel_size = (3, 3) if dropout == 'MC': D = Dropout_mc if dropout == 'pW': D = pW if dropout == 'none': D = Identity layers.append(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='valid', W_regularizer=l2(wd))) layers.append(Activation('relu')) layers.append(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], W_regularizer=l2(wd))) layers.append(Activation('relu')) layers.append(MaxPooling2D(pool_size=pool_size)) layers.append(Flatten()) layers.append(D(p)) layers.append(Dense(nb_units, W_regularizer=l2(wd))) layers.append(Activation('relu')) layers.append(D(p)) layers.append(Dense(nb_classes, W_regularizer=l2(wd))) return layers
Example 19
Project: keras-transformer Author: kpot File: attention.py License: MIT License | 5 votes |
def __init__(self, num_heads: int, use_masking: bool, dropout: float = 0.0, compression_window_size: int = None, **kwargs): """ :param num_heads: number of attention heads :param use_masking: when True, forbids the attention to see the further elements in the sequence (particularly important in language modelling). :param dropout: dropout that should be applied to the attention (after the softmax). :param compression_window_size: an integer value >= 1 controlling how much we should compress the attention. For more details, read about memory-compressed self-attention in "Generating Wikipedia by summarizing long sequences" (https://arxiv.org/pdf/1801.10198.pdf). :param kwargs: any extra arguments typical for a Keras layer, such as name, etc. """ self.num_heads = num_heads self.use_masking = use_masking self.dropout = dropout if (compression_window_size is not None and compression_window_size <= 0): assert ValueError( f"Too small compression window ({compression_window_size})") self.compression_window_size = compression_window_size super().__init__(**kwargs)
Example 20
Project: keras-transformer Author: kpot File: attention.py License: MIT License | 5 votes |
def get_config(self): config = super().get_config() config['num_heads'] = self.num_heads config['use_masking'] = self.use_masking config['dropout'] = self.dropout config['compression_window_size'] = self.compression_window_size return config # noinspection PyAttributeOutsideInit
Example 21
Project: keras-transformer Author: kpot File: attention.py License: MIT License | 5 votes |
def apply_dropout_if_needed(self, attention_softmax, training=None): if 0.0 < self.dropout < 1.0: def dropped_softmax(): return K.dropout(attention_softmax, self.dropout) return K.in_train_phase(dropped_softmax, attention_softmax, training=training) return attention_softmax
Example 22
Project: keras-transformer Author: kpot File: extras.py License: MIT License | 5 votes |
def call(self, inputs, **kwargs): main_input, embedding_matrix = inputs input_shape_tensor = K.shape(main_input) last_input_dim = K.int_shape(main_input)[-1] emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix) projected = K.dot(K.reshape(main_input, (-1, last_input_dim)), self.projection) if self.add_biases: projected = K.bias_add(projected, self.biases, data_format='channels_last') if 0 < self.projection_dropout < 1: projected = K.in_train_phase( lambda: K.dropout(projected, self.projection_dropout), projected, training=kwargs.get('training')) attention = K.dot(projected, K.transpose(embedding_matrix)) if self.scaled_attention: # scaled dot-product attention, described in # "Attention is all you need" (https://arxiv.org/abs/1706.03762) sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx()) attention = attention / sqrt_d result = K.reshape( self.activation(attention), (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim)) return result
Example 23
Project: embedding-as-service Author: amansrivastava17 File: qrnn.py License: MIT License | 5 votes |
def _dropout(x, level, noise_shape=None, seed=None): x = K.dropout(x, level, noise_shape, seed) x *= (1. - level) # compensate for the scaling by the dropout return x
Example 24
Project: embedding-as-service Author: amansrivastava17 File: qrnn.py License: MIT License | 5 votes |
def __init__(self, units, window_size=2, stride=1, return_sequences=False, go_backwards=False, stateful=False, unroll=False, activation='tanh', kernel_initializer='uniform', bias_initializer='zero', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, dropout=0, use_bias=True, input_dim=None, input_length=None, **kwargs): self.return_sequences = return_sequences self.go_backwards = go_backwards self.stateful = stateful self.unroll = unroll self.units = units self.window_size = window_size self.strides = (stride, 1) self.use_bias = use_bias self.activation = activations.get(activation) self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.dropout = dropout self.supports_masking = True self.input_spec = [InputSpec(ndim=3)] self.input_dim = input_dim self.input_length = input_length if self.input_dim: kwargs['input_shape'] = (self.input_length, self.input_dim) super(QRNN, self).__init__(**kwargs)
Example 25
Project: embedding-as-service Author: amansrivastava17 File: qrnn.py License: MIT License | 5 votes |
def step(self, inputs, states): prev_output = states[0] z = inputs[:, :self.units] f = inputs[:, self.units:2 * self.units] o = inputs[:, 2 * self.units:] z = self.activation(z) f = f if self.dropout is not None and 0. < self.dropout < 1. else K.sigmoid(f) o = K.sigmoid(o) output = f * prev_output + (1 - f) * z output = o * output return output, [output]
Example 26
Project: keras-monotonic-attention Author: asmekal File: attention_decoder.py License: GNU Affero General Public License v3.0 | 5 votes |
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # maybe below is more clear implementation compared to older keras # at least it works the same for tensorflow, but not tested on other backends x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) return x
Example 27
Project: keras-monotonic-attention Author: asmekal File: attention_decoder.py License: GNU Affero General Public License v3.0 | 5 votes |
def call(self, x, use_teacher_forcing=True, training=None): # TODO: check that model is loading from .h5 correctly # TODO: for now cannot be shared layer # (only can it we use (or not use) teacher forcing in all cases simultationsly) # this sequence is used only to extract the amount of timesteps (the same as in output sequence) fake_input = x if isinstance(x, list): # teacher forcing for training self.x_seq, self.y_true = x self.use_teacher_forcing = use_teacher_forcing fake_input = K.expand_dims(self.y_true) else: # inference self.x_seq = x self.use_teacher_forcing = False # apply a dense layer over the time dimension of the sequence # do it here because it doesn't depend on any previous steps # therefore we can save computation time: self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a, dropout=self.dropout, input_dim=self.input_dim, timesteps=self.timesteps, output_dim=self.units, training=training) last_output, outputs, states = K.rnn( self.step, inputs=fake_input, initial_states=self.get_initial_state(self.x_seq) ) return outputs
Example 28
Project: costar_plan Author: jhu-lcsr File: permanent_dropout.py License: Apache License 2.0 | 5 votes |
def call(self, x, mask=None): if 0. < self.rate < 1.: noise_shape = self._get_noise_shape(x) x = K.dropout(x, self.rate, noise_shape) return x
Example 29
Project: Quora Author: KevinLiao159 File: neural_networks.py License: MIT License | 5 votes |
def call(self, x): if 0. < self.prob < 1.: self.layer.kernel = K.in_train_phase( K.dropout(self.layer.kernel, self.prob), self.layer.kernel) self.layer.bias = K.in_train_phase( K.dropout(self.layer.bias, self.prob), self.layer.bias) return self.layer.call(x)
Example 30
Project: Quora Author: KevinLiao159 File: submission_v50.py License: MIT License | 5 votes |
def call(self, x): if 0. < self.prob < 1.: self.layer.kernel = K.in_train_phase( K.dropout(self.layer.kernel, self.prob), self.layer.kernel) self.layer.bias = K.in_train_phase( K.dropout(self.layer.bias, self.prob), self.layer.bias) return self.layer.call(x)