import theano import theano.tensor as T from lasagne.layers.base import Layer from lasagne.random import get_rng from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from lasagne.layers import get_output as lo __all__ = [ "DropoutLayer", "dropout", "dropout_channels", "spatial_dropout", "dropout_locations", "GaussianNoiseLayer", ] class WordDropoutLayer(Layer): """Dropout layer Sets values to zero with probability p. See notes for disabling dropout during testing. Parameters ---------- incoming : a :class:`Layer` instance or a tuple the layer feeding into this layer, or the expected input shape p : float or scalar tensor The probability of setting a value to zero rescale : bool If ``True`` (the default), scale the input by ``1 / (1 - p)`` when dropout is enabled, to keep the expected output mean the same. shared_axes : tuple of int Axes to share the dropout mask over. By default, each value can be dropped individually. ``shared_axes=(0,)`` uses the same mask across the batch. ``shared_axes=(2, 3)`` uses the same mask across the spatial dimensions of 2D feature maps. Notes ----- The dropout layer is a regularizer that randomly sets input values to zero; see [1]_, [2]_ for why this might improve generalization. The behaviour of the layer depends on the ``deterministic`` keyword argument passed to :func:`lasagne.layers.get_output`. If ``True``, the layer behaves deterministically, and passes on the input unchanged. If ``False`` or not specified, dropout (and possibly scaling) is enabled. Usually, you would use ``deterministic=False`` at train time and ``deterministic=True`` at test time. See also -------- dropout_channels : Drops full channels of feature maps spatial_dropout : Alias for :func:`dropout_channels` dropout_locations : Drops full pixels or voxels of feature maps References ---------- .. [1] Hinton, G., Srivastava, N., Krizhevsky, A., Sutskever, I., Salakhutdinov, R. R. (2012): Improving neural networks by preventing co-adaptation of feature detectors. arXiv preprint arXiv:1207.0580. .. [2] Srivastava Nitish, Hinton, G., Krizhevsky, A., Sutskever, I., & Salakhutdinov, R. R. (2014): Dropout: A Simple Way to Prevent Neural Networks from Overfitting. Journal of Machine Learning Research, 5(Jun)(2), 1929-1958. """ def __init__(self, incoming, w_freq, alpha, shared_axes=(), **kwargs): super(WordDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.w_frew = w_freq self.alpha = alpha # self.retain = lo(alpha)/(lo(p)+lo(alpha)) self.retain = T.constant(1.)-(T.constant(alpha) / (lo(w_freq) + T.constant(alpha))) self.shared_axes = tuple(shared_axes) def get_output_for(self, input, deterministic=False, **kwargs): if deterministic or self.alpha == 0: return T.ones_like(self.retain, dtype=input.dtype) else: # use nonsymbolic shape for dropout mask if possible mask_shape = self.input_shape if any(s is None for s in mask_shape): mask_shape = input.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + input.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.binomial(mask_shape, p=self.retain, dtype=input.dtype) if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) return mask class ConditionedWordDropoutLayer(Layer): """Dropout layer Sets values to zero with probability p. See notes for disabling dropout during testing. Parameters ---------- incoming : a :class:`Layer` instance or a tuple the layer feeding into this layer, or the expected input shape p : float or scalar tensor The probability of setting a value to zero rescale : bool If ``True`` (the default), scale the input by ``1 / (1 - p)`` when dropout is enabled, to keep the expected output mean the same. shared_axes : tuple of int Axes to share the dropout mask over. By default, each value can be dropped individually. ``shared_axes=(0,)`` uses the same mask across the batch. ``shared_axes=(2, 3)`` uses the same mask across the spatial dimensions of 2D feature maps. Notes ----- The dropout layer is a regularizer that randomly sets input values to zero; see [1]_, [2]_ for why this might improve generalization. The behaviour of the layer depends on the ``deterministic`` keyword argument passed to :func:`lasagne.layers.get_output`. If ``True``, the layer behaves deterministically, and passes on the input unchanged. If ``False`` or not specified, dropout (and possibly scaling) is enabled. Usually, you would use ``deterministic=False`` at train time and ``deterministic=True`` at test time. See also -------- dropout_channels : Drops full channels of feature maps spatial_dropout : Alias for :func:`dropout_channels` dropout_locations : Drops full pixels or voxels of feature maps References ---------- .. [1] Hinton, G., Srivastava, N., Krizhevsky, A., Sutskever, I., Salakhutdinov, R. R. (2012): Improving neural networks by preventing co-adaptation of feature detectors. arXiv preprint arXiv:1207.0580. .. [2] Srivastava Nitish, Hinton, G., Krizhevsky, A., Sutskever, I., & Salakhutdinov, R. R. (2014): Dropout: A Simple Way to Prevent Neural Networks from Overfitting. Journal of Machine Learning Research, 5(Jun)(2), 1929-1958. """ def __init__(self, incoming, previous_mask, p=0.5, rescale=False, shared_axes=(), **kwargs): super(ConditionedWordDropoutLayer, self).__init__(incoming, **kwargs) self._srng = RandomStreams(get_rng().randint(1, 2147462579)) self.p = p self.rescale = rescale # self.retain = lo(alpha)/(lo(p)+lo(alpha)) self.retain = T.constant(1) - p self.previous_mask = -(lo(previous_mask)-T.constant(1)) self.shared_axes = tuple(shared_axes) def get_output_for(self, input, deterministic=False, **kwargs): if deterministic or self.p == 0: return T.ones_like(self.retain, dtype=input.dtype) else: # Using theano constant to prevent upcasting # one = T.constant(1) # retain_prob = one - self.p # if self.rescale: # input /= retain_prob # use nonsymbolic shape for dropout mask if possible mask_shape = self.input_shape if any(s is None for s in mask_shape): mask_shape = input.shape # apply dropout, respecting shared axes if self.shared_axes: shared_axes = tuple(a if a >= 0 else a + input.ndim for a in self.shared_axes) mask_shape = tuple(1 if a in shared_axes else s for a, s in enumerate(mask_shape)) mask = self._srng.binomial(mask_shape, p=self.retain, dtype=input.dtype) mask = T.or_(mask, self.previous_mask) if self.shared_axes: bcast = tuple(bool(s == 1) for s in mask_shape) mask = T.patternbroadcast(mask, bcast) return mask