from __future__ import absolute_import import warnings from keras import backend as K from keras import activations from keras import initializers from keras import regularizers from keras import constraints from keras.engine import Layer from keras.engine import InputSpec from keras.legacy import interfaces from keras.layers import RNN from keras.layers.recurrent import _generate_dropout_mask class IndRNNCell(Layer): """Independently Recurrent Neural Networks Cell class. Derived from the paper [Independently Recurrent Neural Network (IndRNN): Building A Longer and Deeper RNN](https://arxiv.org/abs/1803.04831) Ref: [Tensorflow implementation](https://github.com/batzner/indrnn) # Arguments units: Positive integer, dimensionality of the output space. recurrent_clip_min: Can be one of None, -1 or float. If None, clipping of weights will not take place. If float, exact value will be used as clipping range If -1, will calculate the clip value for `relu` activation recurrent_clip_max: Can be one of None or float. If None, clipping of weights will not take place. If float, exact value will be used as clipping range If -1, will calculate the clip value for `relu` activation activation: Activation function to use (see [activations](../activations.md)). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. Can be `None` or an available initializer. Defaults to `None`. If None, defaults to uniform initialization. If None, and recurrent_clip_min/max is not None, then it uses those clip values as for uniform initialization. (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. implementation: Implementation mode, must be 2. Mode 1 will structure its operations as a larger number of smaller dot products and additions, whereas mode 2 will batch them into fewer, larger operations. These modes will have different performance profiles on different hardware and for different applications. """ def __init__(self, units, recurrent_clip_min=-1, recurrent_clip_max=-1, activation='relu', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer=None, bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=2, **kwargs): super(IndRNNCell, self).__init__(**kwargs) if implementation != 2: warnings.warn( "IndRNN only supports implementation 2 for the moment. Defaulting to implementation = 2") implementation = 2 if recurrent_clip_min is None or recurrent_clip_max is None: recurrent_clip_min = None recurrent_clip_max = None self.units = units self.recurrent_clip_min = recurrent_clip_min self.recurrent_clip_max = recurrent_clip_max self.activation = activations.get(activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.recurrent_initializer = initializers.get(recurrent_initializer) \ if recurrent_initializer is not None else None self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.recurrent_regularizer = regularizers.get(recurrent_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.recurrent_constraint = constraints.get(recurrent_constraint) self.bias_constraint = constraints.get(bias_constraint) self.dropout = min(1., max(0., dropout)) self.recurrent_dropout = min(1., max(0., recurrent_dropout)) self.implementation = implementation self.state_size = (self.units,) self._dropout_mask = None self._recurrent_masks = None def build(self, input_shape): input_dim = input_shape[-1] if self.recurrent_clip_min == -1 or self.recurrent_clip_max == -1: self.recurrent_clip_min = 0.0 if hasattr(self, 'timesteps') and self.timesteps is not None: self.recurrent_clip_max = pow(2.0, 1. / self.timesteps) else: warnings.warn("IndRNNCell: Number of timesteps could not be determined. \n" "Defaulting to max clipping range of 1.0. \n" "If this model was trained using a specific timestep during training, " "inference may be wrong due to this default setting.\n" "Please ensure that you use the same number of timesteps during training " "and evaluation") self.recurrent_clip_max = 1.0 self.kernel = self.add_weight(shape=(input_dim, self.units), name='input_kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.recurrent_initializer is None: if self.recurrent_clip_min is not None and self.recurrent_clip_max is not None: initialization_value = min(self.recurrent_clip_max, 1.0) self.recurrent_initializer = initializers.uniform(-initialization_value, initialization_value) else: self.recurrent_initializer = initializers.uniform(-1.0, 1.0) self.recurrent_kernel = self.add_weight(shape=(self.units,), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) if self.recurrent_clip_min is not None and self.recurrent_clip_max is not None: if abs(self.recurrent_clip_min): abs_recurrent_kernel = K.abs(self.recurrent_kernel) min_recurrent_kernel = K.maximum(abs_recurrent_kernel, abs(self.recurrent_clip_min)) self.recurrent_kernel = K.sign(self.recurrent_kernel) * min_recurrent_kernel self.recurrent_kernel = K.clip(self.recurrent_kernel, self.recurrent_clip_min, self.recurrent_clip_max) if self.use_bias: bias_initializer = self.bias_initializer self.bias = self.add_weight(shape=(self.units,), name='bias', initializer=bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.built = True def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=1) if (0 < self.recurrent_dropout < 1 and self._recurrent_masks is None): _recurrent_mask = _generate_dropout_mask( K.ones_like(states[0]), self.recurrent_dropout, training=training, count=1) self._recurrent_masks = _recurrent_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_masks = self._recurrent_masks h_tm1 = states[0] # previous state if 0. < self.dropout < 1.: inputs *= dp_mask[0] if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_masks[0] h = K.dot(inputs, self.kernel) h = h + (h_tm1 * self.recurrent_kernel) if self.use_bias: h = K.bias_add(h, self.bias) h = self.activation(h) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h] def get_config(self): config = {'units': self.units, 'recurrent_clip_min': self.recurrent_clip_min, 'recurrent_clip_max': self.recurrent_clip_max, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation} base_config = super(IndRNNCell, self).get_config() return dict(list(base_config.items()) + list(config.items())) class IndRNN(RNN): """Independently Recurrent Neural Networks Cell class. Derived from the paper [Independently Recurrent Neural Network (IndRNN): Building A Longer and Deeper RNN](https://arxiv.org/abs/1803.04831) Ref: [Tensorflow implementation](https://github.com/batzner/indrnn) # Arguments units: Positive integer, dimensionality of the output space. recurrent_clip_min: Can be one of None, -1 or float. If None, clipping of weights will not take place. If float, exact value will be used as clipping range If -1, computes the default clipping range for Relu activations recurrent_clip_max: Can be one of None, -1 or float. If None, clipping of weights will not take place. If float, exact value will be used as clipping range If -1, computes the default clipping range for Relu activations activation: Activation function to use (see [activations](../activations.md)). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. (see [initializers](../initializers.md)). recurrent_initializer: Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. (see [initializers](../initializers.md)). bias_initializer: Initializer for the bias vector (see [initializers](../initializers.md)). unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force `bias_initializer="zeros"`. This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) kernel_regularizer: Regularizer function applied to the `kernel` weights matrix (see [regularizer](../regularizers.md)). recurrent_regularizer: Regularizer function applied to the `recurrent_kernel` weights matrix (see [regularizer](../regularizers.md)). bias_regularizer: Regularizer function applied to the bias vector (see [regularizer](../regularizers.md)). activity_regularizer: Regularizer function applied to the output of the layer (its "activation"). (see [regularizer](../regularizers.md)). kernel_constraint: Constraint function applied to the `kernel` weights matrix (see [constraints](../constraints.md)). recurrent_constraint: Constraint function applied to the `recurrent_kernel` weights matrix (see [constraints](../constraints.md)). bias_constraint: Constraint function applied to the bias vector (see [constraints](../constraints.md)). dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. implementation: Implementation mode, either 1 or 2. Mode 1 will structure its operations as a larger number of smaller dot products and additions, whereas mode 2 will batch them into fewer, larger operations. These modes will have different performance profiles on different hardware and for different applications. return_sequences: Boolean. Whether to return the last output. in the output sequence, or the full sequence. return_state: Boolean. Whether to return the last state in addition to the output. go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence. stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch. unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. # References - [Learning to forget: Continual prediction with NestedLSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015) - [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf) - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) - [Independently Recurrent Neural Network (IndRNN): Building A Longer and Deeper RNN](https://arxiv.org/abs/1803.04831) """ @interfaces.legacy_recurrent_support def __init__(self, units, recurrent_clip_min=-1, recurrent_clip_max=-1, activation='relu', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer=None, bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=2, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, **kwargs): if implementation == 0: warnings.warn('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=2`.' 'Please update your layer call.') if K.backend() == 'theano': warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' 'due to technical limitations. ' 'You can either set `dropout` and `recurrent_dropout` to 0, ' 'or use the TensorFlow backend.') dropout = 0. recurrent_dropout = 0. cell = IndRNNCell(units, recurrent_clip_min=recurrent_clip_min, recurrent_clip_max=recurrent_clip_max, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation) super(IndRNN, self).__init__(cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs) self.activity_regularizer = regularizers.get(activity_regularizer) def build(self, input_shape): timesteps = input_shape[1] if timesteps is None: warnings.warn("Number of timesteps was not provided. If this model is being used for training purposes, \n" "it is recommended to provide a finite number of timesteps when defining the input shape, \n" "so as to initialize the weights of the recurrent kernel properly and avoid exploding gradients.") self.cell.timesteps = timesteps super(IndRNN, self).build(input_shape) def call(self, inputs, mask=None, training=None, initial_state=None, constants=None): self.cell._dropout_mask = None self.cell._recurrent_masks = None return super(IndRNN, self).call(inputs, mask=mask, training=training, initial_state=initial_state, constants=constants) @property def units(self): return self.cell.units @property def recurrent_clip_min(self): return self.cell.recurrent_clip_min @property def recurrent_clip_max(self): return self.cell.recurrent_clip_max @property def activation(self): return self.cell.activation @property def use_bias(self): return self.cell.use_bias @property def kernel_initializer(self): return self.cell.kernel_initializer @property def recurrent_initializer(self): return self.cell.recurrent_initializer @property def bias_initializer(self): return self.cell.bias_initializer @property def kernel_regularizer(self): return self.cell.kernel_regularizer @property def recurrent_regularizer(self): return self.cell.recurrent_regularizer @property def bias_regularizer(self): return self.cell.bias_regularizer @property def kernel_constraint(self): return self.cell.kernel_constraint @property def recurrent_constraint(self): return self.cell.recurrent_constraint @property def bias_constraint(self): return self.cell.bias_constraint @property def dropout(self): return self.cell.dropout @property def recurrent_dropout(self): return self.cell.recurrent_dropout @property def implementation(self): return self.cell.implementation def get_config(self): config = {'units': self.units, 'recurrent_clip_min': self.recurrent_clip_min, 'recurrent_clip_max': self.recurrent_clip_max, 'activation': activations.serialize(self.activation), 'use_bias': self.use_bias, 'kernel_initializer': initializers.serialize(self.kernel_initializer), 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), 'bias_initializer': initializers.serialize(self.bias_initializer), 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 'activity_regularizer': regularizers.serialize(self.activity_regularizer), 'kernel_constraint': constraints.serialize(self.kernel_constraint), 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), 'bias_constraint': constraints.serialize(self.bias_constraint), 'dropout': self.dropout, 'recurrent_dropout': self.recurrent_dropout, 'implementation': self.implementation} base_config = super(IndRNN, self).get_config() del base_config['cell'] return dict(list(base_config.items()) + list(config.items())) @classmethod def from_config(cls, config): if 'implementation' in config and config['implementation'] == 0: config['implementation'] = 2 return cls(**config)