python source code of __init_

"""
Classes that define how vector spaces are formatted

Most of our models can be viewed as linearly transforming
one vector space to another. These classes define how the
vector spaces should be represented as theano/numpy
variables.

For example, the VectorSpace class just represents a
vector space with a vector, and the model can transform
between spaces with a matrix multiply. The Conv2DSpace
represents a vector space as an image, and the model
can transform between spaces with a 2D convolution.

To make models as general as possible, models should be
written in terms of Spaces, rather than in terms of
numbers of hidden units, etc. The model should also be
written to transform between spaces using a generic
linear transformer from the pylearn2.linear module.

The Space class is needed so that the model can specify
what kinds of inputs it needs and what kinds of outputs
it will produce when communicating with other parts of
the library. The model also uses Space objects internally
to allocate parameters like hidden unit bias terms in
the right space.
"""
__authors__ = "Ian Goodfellow"
__copyright__ = "Copyright 2010-2012, Universite de Montreal"
__credits__ = ["Ian Goodfellow"]
__license__ = "3-clause BSD"
__maintainer__ = "LISA Lab"
__email__ = "pylearn-dev@googlegroups"

import functools
import warnings
import numpy as np
from theano.compat.six.moves import xrange
import theano
import theano.sparse
from theano import tensor
from theano.tensor import TensorType
from theano.gof.op import get_debug_values
from theano.sandbox.cuda.type import CudaNdarrayType
from pylearn2.utils import py_integer_types, safe_zip, sharedX, wraps
from pylearn2.format.target_format import OneHotFormatter

if theano.sparse.enable_sparse:
    # We know scipy.sparse is available
    import scipy.sparse


def _is_batch_all(batch, predicate):
    """
    Implementation of is_symbolic_batch() and is_numeric_batch().
    Returns True iff predicate() returns True for all components of
    (possibly composite) batch.

    Parameters
    ----------
    batch : any numeric or symbolic batch.
        This includes numpy.ndarray, theano.gof.Variable, None, or a (nested)
        tuple thereof.

    predicate : function.
        A unary function of any non-composite batch that returns True or False.
    """
    # Catches any CompositeSpace batches that were mistakenly hand-constructed
    # using nested lists rather than nested tuples.
    assert not isinstance(batch, list)

    # Data-less batches such as None or () are valid numeric and symbolic
    # batches.
    #
    # Justification: we'd like
    # is_symbolic_batch(space.make_theano_batch()) to always be True, even if
    # space is an empty CompositeSpace.
    if batch is None or (isinstance(batch, tuple) and len(batch) == 0):
        return True

    if isinstance(batch, tuple):
        subbatch_results = tuple(_is_batch_all(b, predicate)
                                 for b in batch)
        result = all(subbatch_results)

        # The subbatch_results must be all true, or all false, not a mix.
        assert result == any(subbatch_results), ("composite batch had a "
                                                 "mixture of numeric and "
                                                 "symbolic subbatches. This "
                                                 "should never happen.")
        return result
    else:
        return predicate(batch)


def is_symbolic_batch(batch):
    """
    Returns True if batch is a symbolic variable.

    Note that a batch may be both a symbolic and numeric variable
    (e.g. () for empty CompositeSpaces, None for NullSpaces).
    """

    return _is_batch_all(batch, lambda x: isinstance(x, theano.gof.Variable))


def is_numeric_batch(batch):
    """
    Returns True if batch is a numeric variable.

    Note that a batch may be both a symbolic and numeric variable
    (e.g. () for empty CompositeSpaces, None for NullSpaces).
    """
    def is_numeric(batch):
        # Uses the 'CudaNdarray' string to avoid importing
        # theano.sandbox.cuda when it is not available
        return (isinstance(batch, np.ndarray) or
                scipy.sparse.issparse(batch) or
                str(type(batch)) == "<type 'CudaNdarray'>")

    return _is_batch_all(batch, is_numeric)


def _dense_to_sparse(batch):
    """
    Casts dense batches to sparse batches (non-composite).

    Supports both symbolic and numeric variables.
    """
    if isinstance(batch, tuple):
        raise TypeError("Composite batches not supported.")

    assert not isinstance(batch, list)

    if is_symbolic_batch(batch):
        assert isinstance(batch, theano.tensor.TensorVariable)
        return theano.sparse.csr_from_dense(batch)
    else:
        assert isinstance(batch, np.ndarray), "type of batch: %s" % type(batch)
        return scipy.sparse.csr_matrix(batch)


def _reshape(arg, shape):
    """
    Reshapes a tensor. Supports both symbolic and numeric variables.

    This is a hack that first converts from sparse to dense, reshapes
    the dense tensor, then re-converts from dense to sparse. It is
    therefore memory-inefficient and unsuitable for large tensors. It
    will be replaced by a proper sparse reshaping Op once Theano
    implements that.
    """

    if isinstance(arg, tuple):
        raise TypeError("Composite batches not supported.")

    assert not isinstance(arg, list)

    if isinstance(arg, (np.ndarray, theano.tensor.TensorVariable)):
        return arg.reshape(shape)
    elif isinstance(arg, theano.sparse.SparseVariable):
        warnings.warn("Using pylearn2.space._reshape(), which is a "
                      "memory-inefficient hack for reshaping sparse tensors. "
                      "Do not use this on large tensors. This will eventually "
                      "be replaced by a proper Theano Op for sparse "
                      "reshaping, once that is written.")
        dense = theano.sparse.dense_from_sparse(arg)
        dense = dense.reshape(shape)
        if arg.format == 'csr':
            return theano.sparse.csr_from_dense(dense)
        elif arg.format == 'csc':
            return theano.sparse.csc_from_dense(dense)
        else:
            raise ValueError('Unexpected sparse format "%s".' % arg.format)
    else:
        raise TypeError('Unexpected batch type "%s"' % str(type(arg)))


def _cast(arg, dtype):
    """
    Does element-wise casting to dtype.
    Supports symbolic, numeric, simple, and composite batches.

    Returns <arg> untouched if <dtype> is None, or dtype is unchanged
    (i.e. casting a float32 batch to float32).

       (One exception: composite batches are never returned as-is.
        A new tuple will always be returned. However, any components
        with unchanged dtypes will be returned untouched.)
    """

    if dtype is None:
        return arg

    assert dtype in tuple(t.dtype for t in theano.scalar.all_types)

    if isinstance(arg, tuple):
        return tuple(_cast(a, dtype) for a in arg)
    elif isinstance(arg, np.ndarray):
        # theano._asarray is a safer drop-in replacement to numpy.asarray.
        return theano._asarray(arg, dtype=dtype)
    elif str(type(arg)) == "<type 'CudaNdarray'>":  # numeric CUDA array
        if str(dtype) != 'float32':
            raise TypeError("Can only cast a numeric CudaNdarray to "
                            "float32, not %s" % dtype)
        else:
            return arg
    elif (isinstance(arg, theano.gof.Variable) and
          isinstance(arg.type, CudaNdarrayType)):  # symbolic CUDA array
        if str(dtype) != 'float32':
            raise TypeError("Can only cast a theano CudaNdArrayType to "
                            "float32, not %s" % dtype)
        else:
            return arg
    elif scipy.sparse.issparse(arg):
        return arg.astype(dtype)
    elif isinstance(arg, theano.tensor.TensorVariable):
        return theano.tensor.cast(arg, dtype)
    elif isinstance(arg, theano.sparse.SparseVariable):
        return theano.sparse.cast(arg, dtype)
    elif isinstance(arg, theano.sandbox.cuda.var.CudaNdarrayVariable):
        return arg
    else:
        raise TypeError("Unsupported arg type '%s'" % str(type(arg)))


def _undo_op(arg, string, strict=False):
    """
    Undo symbolic op if string is in str(op).

    Returns <arg> untouched if there was no symbolic op.

    Parameters
    ----------
    arg : any symbolic variable.
    string : str
        String that specifies op.
    strict : bool
        Whether to force op undo or not (default False).
    """

    if hasattr(arg.owner, 'op'):
        owner = arg.owner
        if string in str(owner.op):
            return owner.inputs[0]
        elif strict:
            raise ValueError(string + ' not found in op ' +
                             str(owner.op) + '.')
    elif strict:
        raise ValueError(string + ' op not found in variable ' +
                         str(arg) + '.')
    return arg


class Space(object):
    """
    A vector space that can be transformed by a linear operator.

    Space and its subclasses are used to transform a data batch's geometry
    (e.g. vectors <--> matrices) and optionally, its dtype (e.g. float <-->
    int).

    Batches may be one of the following types:

        - numpy.ndarray
        - scipy.sparse.csr_matrix
        - theano.gof.Variable
        - None (for NullSpace)
        - A (nested) tuple of the above, possibly empty
          (for CompositeSpace).

    Parameters
    ----------
    validate_callbacks : list
        Callbacks that are run at the start of a call to validate.
        Each should be a callable with the same signature as validate.
        An example use case is installing an instance-specific error
        handler that provides extra instructions for how to correct an
        input that is in a bad space.
    np_validate_callacks : list
        similar to validate_callbacks, but run on calls to np_validate
    """
    def __init__(self, validate_callbacks=None,
                 np_validate_callbacks=None):
        if validate_callbacks is None:
            validate_callbacks = []

        if np_validate_callbacks is None:
            np_validate_callbacks = []

        self.validate_callbacks = validate_callbacks
        self.np_validate_callbacks = np_validate_callbacks

    # Forces subclasses to implement __eq__.
    # This is necessary for _format_as to work correctly.
    def __eq__(self, other):
        """
        Returns true iff
        space.format_as(batch, self) and
        space.format_as(batch, other) return the same formatted batch.
        """
        raise NotImplementedError("__eq__ not implemented in class %s." %
                                  type(self))

    def get_batch_axis(self):
        """
        Returns the batch axis of the output space.

        Returns
        -------
        batch_axis : int
            the axis of the batch in the output space.
        """
        return 0

    def __ne__(self, other):
        """
        .. todo::

            WRITEME
        """
        return not (self == other)

    def __repr__(self):
        """
        .. todo::

            WRITEME
        """
        return str(self)

    @property
    def dtype(self):
        """
        An object representing the data type used by this space.

        For simple spaces, this will be a dtype string, as used by numpy,
        scipy, and theano (e.g. 'float32').

        For data-less spaces like NoneType, this will be some other string.

        For composite spaces, this will be a nested tuple of such strings.
        """
        raise NotImplementedError()

    @dtype.setter
    def dtype(self, new_value):
        """
        .. todo::

            WRITEME
        """
        raise NotImplementedError()

    @dtype.deleter
    def dtype(self):
        """
        .. todo::

            WRITEME
        """
        raise RuntimeError("You may not delete the dtype of a space, "
                           "though you can set it to None.")

    def get_origin(self):
        """
        Returns the origin in this space.

        Returns
        -------
        origin : ndarray
            An NumPy array, the shape of a single points in this
            space, representing the origin.
        """
        raise NotImplementedError()

    def get_origin_batch(self, batch_size, dtype=None):
        """
        Returns a batch containing `batch_size` copies of the origin.

        Parameters
        ----------
        batch_size : int
            The number of examples in the batch to be returned.
        dtype : WRITEME
            The dtype of the batch to be returned. Default = None.
            If None, use self.dtype.

        Returns
        -------
        batch : ndarray
            A NumPy array in the shape of a batch of `batch_size` points in
            this space (with points being indexed along the first axis),
            each `batch[i]` being a copy of the origin.
        """
        raise NotImplementedError()

    def make_shared_batch(self, batch_size, name=None, dtype=None):
        """
        .. todo::

            WRITEME
        """

        dtype = self._clean_dtype_arg(dtype)
        origin_batch = self.get_origin_batch(batch_size, dtype)
        return theano.shared(origin_batch, name=name)

    def make_theano_batch(self, name=None, dtype=None, batch_size=None):
        """
        Returns a symbolic variable representing a batch of points
        in this space.

        Parameters
        ----------
        name : str
            Variable name for the returned batch.
        dtype : str
            Data type for the returned batch.
            If omitted (None), self.dtype is used.
        batch_size : int
            Number of examples in the returned batch.

        Returns
        -------
        batch : TensorVariable, SparseVariable, or tuple thereof
            A batch with the appropriate number of dimensions and
            appropriate broadcast flags to represent a batch of
            points in this space.
        """
        raise NotImplementedError()

    def make_batch_theano(self, name=None, dtype=None, batch_size=None):
        """ An alias to make_theano_batch """

        return self.make_theano_batch(name=name,
                                      dtype=dtype,
                                      batch_size=batch_size)

    @wraps(make_theano_batch)
    def get_theano_batch(self, *args, **kwargs):

        return self.make_theano_batch(*args, **kwargs)

    def get_total_dimension(self):
        """
        Returns a Python int (not a theano iscalar) representing
        the dimensionality of a point in this space.

        If you format a batch of examples in this space as a
        design matrix (i.e., VectorSpace batch) then the
        number of columns will be equal to the total dimension.
        """

        raise NotImplementedError(str(type(self)) +
                                  " does not implement get_total_dimension.")

    def np_format_as(self, batch, space):
        """
        Returns a numeric batch (e.g. a numpy.ndarray or scipy.sparse sparse
        array), formatted to lie in this space.

        This is just a wrapper around self._format_as, with an extra check
        to throw an exception if <batch> is symbolic.

        Should be invertible, i.e. batch should equal
        `space.format_as(self.format_as(batch, space), self)`

        Parameters
        ----------
        batch : numpy.ndarray, or one of the scipy.sparse matrices.
            Array which lies in this space.
        space : Space
            Target space to format batch to.

        Returns
        -------
        WRITEME
            The formatted batch
        """

        self._check_is_numeric(batch)

        return self._format_as(is_numeric=True,
                               batch=batch,
                               space=space)

    def _check_sizes(self, space):
        """
        Called by self._format_as(space), to check whether self and space
        have compatible sizes. Throws a ValueError if they don't.
        """
        my_dimension = self.get_total_dimension()
        other_dimension = space.get_total_dimension()
        if my_dimension != other_dimension:
            raise ValueError(str(self) + " with total dimension " +
                             str(my_dimension) +
                             " can't format a batch into " +
                             str(space) + "because its total dimension is " +
                             str(other_dimension))

    def format_as(self, batch, space):
        """
        .. todo::

            WRITEME
        """
        self._check_is_symbolic(batch)
        return self._format_as(is_numeric=False,
                               batch=batch,
                               space=space)

    def _format_as(self, is_numeric, batch, space):
        """
        The shared implementation of format_as() and np_format_as().
        Agnostic to whether batch is symbolic or numeric, which avoids
        duplicating a lot of code between format_as() and np_format_as().

        Calls the appropriate callbacks, then calls self._format_as_impl().

        Should be invertible, i.e. batch should equal
        `space._format_as(self._format_as(batch, space), self)`

        Parameters
        ----------
        is_numeric : bool
            Set to True to call np_validate_callbacks().
            Set to False to call validate_callbacks().
        batch : WRITEME
        space : Space
            WRITEME

        Returns
        -------
        WRITEME
        """

        assert isinstance(is_numeric, bool)

        # Checks if batch belongs to this space
        self._validate(is_numeric, batch)

        # checks if self and space have compatible sizes for formatting.
        self._check_sizes(space)

        return self._format_as_impl(is_numeric, batch, space)

    def _format_as_impl(self, is_numeric, batch, target_space):
        """
        Actual implementation of format_as/np_format_as. Formats batch to
        target_space.

        Should be invertible, i.e. batch should equal
        `space._format_as_impl(self._format_as_impl(batch, space), self)`

        Parameters
        ----------
        is_numeric : bool
            Set to True to treat batch as a numeric batch, False to
            treat it as a symbolic batch. This is necessary because
            sometimes a batch's numeric/symbolicness can be ambiguous,
            i.e. when it's the empty tuple ().
        batch : a numpy.ndarray, scipy.sparse matrix, theano symbol, or a \
                nested tuple thereof
            Implementations of this method may assume that batch lies in this
            space (i.e. that it passed self._validate(batch) without throwing
            an exception).
        target_space : A Space subclass
            The space to transform batch into.

        Returns
        -------
        The batch, converted to the target_space.
        """

        raise NotImplementedError("%s does not implement _format_as_impl()." %
                                  type(self))

    def undo_np_format_as(self, batch, space):
        """
        Returns a numeric batch (e.g. a numpy.ndarray or scipy.sparse sparse
        array), with formatting from space undone.

        This is just a wrapper around self._undo_format_as, with an extra check
        to throw an exception if <batch> is symbolic.

        Parameters
        ----------
        batch : numpy.ndarray, or one of the scipy.sparse matrices.
            Array which lies in this space.
        space : Space
            Space to undo formatting from.

        Returns
        -------
        numpy.ndarray or one of the scipy.sparse matrices
            The formatted batch.
        """

        self._check_is_numeric(batch)

        return space.np_format_as(batch=batch,
                                  space=self)

    def undo_format_as(self, batch, space):
        """
        Returns a symbolic batch (e.g. a theano.tensor or theano.sparse
        array), with formatting from space undone.

        This is just a wrapper around self._undo_format_as, with an extra check
        to throw an exception if <batch> is symbolic. Formatting to space

        Parameters
        ----------
        batch : numpy.ndarray, or one of the scipy.sparse matrices.
            Array which lies in this space.
        space : Space
            Space to undo formatting from.

        Returns
        -------
        A symbolic Theano variable
            The batch formatted as self.
        """
        self._check_is_symbolic(batch)
        space.validate(batch)
        self._check_sizes(space)

        batch = self._undo_format_as_impl(batch=batch,
                                          space=space)
        # Checks if batch belongs to this space
        self.validate(batch)

        return batch

    def _undo_format_as_impl(self, batch, target_space):
        """
        Actual implementation of undo_format_as.
        Undoes target_space_formatting.
        Note that undo_np_format_as calls np_format_as.

        Parameters
        ----------
        batch : a theano symbol, or a nested tuple thereof
            Implementations of this method may assume that batch lies in
            space (i.e. that it passed self._validate(batch) without throwing
            an exception).
        target_space : A Space subclass
            The space to undo batch formatting from.

        Returns
        -------
        A symbolic Theano variable
            The batch, converted from target_space, back to self.
        """

        raise NotImplementedError("%s does not implement "
                                  "_undo_format_as_impl()." %
                                  type(self))

    def validate(self, batch):
        """
        Runs all validate_callbacks, then checks that batch lies in this space.
        Raises an exception if the batch isn't symbolic, or if any of these
        checks fails.

        Parameters
        ----------
        batch : a symbolic (Theano) variable that lies in this space.
        """
        self._check_is_symbolic(batch)
        self._validate(is_numeric=False, batch=batch)

    def np_validate(self, batch):
        """
        Runs all np_validate_callbacks, then checks that batch lies in this
        space. Raises an exception if the batch isn't numeric, or if any of
        these checks fails.

        Parameters
        ----------
        batch : a numeric (numpy/scipy.sparse) variable that lies in this \
                space
        """
        self._check_is_numeric(batch)
        self._validate(is_numeric=True, batch=batch)

    def _validate(self, is_numeric, batch):
        """
        Shared implementation of validate() and np_validate().
        Calls validate_callbacks or np_validate_callbacks as appropriate,
        then calls self._validate_impl(batch) to verify that batch belongs
        to this space.

        Parameters
        ----------
        is_numeric : bool.
            Set to True to call np_validate_callbacks,
            False to call validate_callbacks.
            Necessary because it can be impossible to tell from the
            batch whether it should be treated as a numeric of symbolic
            batch, for example when the batch is the empty tuple (),
            or NullSpace batch None.

        batch : a theano variable, numpy ndarray, scipy.sparse matrix \
                or a nested tuple thereof
            Represents a batch belonging to this space.
        """

        if is_numeric:
            self._check_is_numeric(batch)
            callbacks_name = "np_validate_callbacks"
        else:
            self._check_is_symbolic(batch)
            callbacks_name = "validate_callbacks"

        if not hasattr(self, callbacks_name):
            raise TypeError("The " + str(type(self)) + " Space subclass "
                            "is required to call the Space superclass "
                            "constructor but does not.")
        else:
            callbacks = getattr(self, callbacks_name)
            for callback in callbacks:
                callback(batch)

        self._validate_impl(is_numeric, batch)

    def _validate_impl(self, is_numeric, batch):
        """
        Subclasses must override this method so that it throws an
        exception if the batch is the wrong shape or dtype for this Space.

        Parameters
        ----------

        is_numeric : bool
            Set to True to treat batch as a numeric type
            (numpy.ndarray or scipy.sparse matrix).
            Set to False to treat batch as a symbolic (Theano) variable.
            Necessary because batch could be (), which could be numeric
            or symbolic.

        batch : A numpy ndarray, scipy.sparse matrix, theano variable \
                or a nested tuple thereof.
            Must be a valid batch belonging to this space.
        """
        raise NotImplementedError('Class "%s" does not implement '
                                  '_validate_impl()' % type(self))

    def batch_size(self, batch):
        """
        Returns the batch size of a symbolic batch.

        Parameters
        ----------
        batch : WRITEME
        """
        return self._batch_size(is_numeric=False, batch=batch)

    def np_batch_size(self, batch):
        """
        Returns the batch size of a numeric (numpy/scipy.sparse) batch.

        Parameters
        ----------
        batch : WRITEME
        """
        return self._batch_size(is_numeric=True, batch=batch)

    def _batch_size(self, is_numeric, batch):
        """
        .. todo::

            WRITEME
        """
        self._validate(is_numeric, batch)
        return self._batch_size_impl(is_numeric, batch)

    def _batch_size_impl(self, is_numeric, batch):
        """
        Returns the batch size of a batch.

        Parameters
        ----------
        batch : WRITEME
        """
        raise NotImplementedError("%s does not implement batch_size" %
                                  type(self))

    def get_batch(self, data, start, end):
        """
        Returns a batch of data starting from index `start` to index `stop`

        Parameters
        ----------
        data : WRITEME
        start : WRITEME
        end : WRITEME
        """
        raise NotImplementedError(str(type(self)) + " does not implement " +
                                  "get_batch")

    @staticmethod
    def _check_is_numeric(batch):
        """
        .. todo::

            WRITEME
        """
        if not is_numeric_batch(batch):
            raise TypeError('Expected batch to be a numeric variable, but '
                            'instead it was of type "%s"' % type(batch))

    @staticmethod
    def _check_is_symbolic(batch):
        """
        .. todo::

            WRITEME
        """
        if not is_symbolic_batch(batch):
            raise TypeError('Expected batch to be a symbolic variable, but '
                            'instead it was of type "%s"' % type(batch))

    def _clean_dtype_arg(self, dtype):
        """
        Checks dtype string for validity, and returns it if it is.

        If dtype is 'floatX', returns the theano.config.floatX dtype (this will
        either be 'float32' or 'float64'.
        """

        if isinstance(dtype, np.dtype):
            dtype = str(dtype)

        if dtype == 'floatX':
            return theano.config.floatX

        if dtype is None or \
           dtype in tuple(x.dtype for x in theano.scalar.all_types):
            return dtype

        raise TypeError('Unrecognized value "%s" (type %s) for dtype arg' %
                        (dtype, type(dtype)))


class SimplyTypedSpace(Space):
    """
    An abstract base class for Spaces that use a numpy/theano dtype string for
    its .dtype property.
    """

    def __init__(self, dtype='floatX', **kwargs):
        super(SimplyTypedSpace, self).__init__(**kwargs)
        self._dtype = super(SimplyTypedSpace, self)._clean_dtype_arg(dtype)

    def _clean_dtype_arg(self, dtype):
        """
        if dtype is None, checks that self.dtype is not None.
        Otherwise, same as superclass' implementation.
        """

        if dtype is None:
            if self.dtype is None:
                raise TypeError("self.dtype is None, so you must provide a "
                                "non-None dtype argument to this method.")
            return self.dtype

        return super(SimplyTypedSpace, self)._clean_dtype_arg(dtype)

    def _validate_impl(self, is_numeric, batch):
        """
        .. todo::

            WRITEME
        """
        if isinstance(batch, tuple):
            raise TypeError("This space only supports simple dtypes, but "
                            "received a composite batch.")

        # Checks for information-destroying casts.
        #
        # To be maximally strict, we'd guard against all loss of precision by
        # checking if np.can_cast(batch.dtype, self.dtype).
        #
        # Because this prohibits float64->float32, it breaks too much of the
        # codebase (float64 is default float, float32 is default CUDA float for
        # many graphics cards).
        #
        # Therefore, we only prohibit the following:
        # * non-integral type to integral type
        # * complex to non-complex

        def is_complex(dtype):
            return np.issubdtype(dtype, np.complex)

        def is_integral(dtype):
            return np.issubdtype(dtype, np.integer)

        if self.dtype is not None:
            if (is_complex(batch.dtype) and not is_complex(self.dtype)) or \
               (not is_integral(batch.dtype) and is_integral(self.dtype)):
                raise TypeError("Cannot safely cast batch dtype %s to "
                                "space's dtype %s. " %
                                (batch.dtype, self.dtype))

    @property
    def dtype(self):
        """
        .. todo::

            WRITEME
        """
        return self._dtype

    @dtype.setter
    def dtype(self, new_dtype):
        """
        .. todo::

            WRITEME
        """
        self._dtype = super(SimplyTypedSpace, self)._clean_dtype_arg(new_dtype)

    def __setstate__(self, state_dict):
        """
        .. todo::

            WRITEME
        """
        self.__dict__.update(state_dict)

        # When unpickling a Space that was pickled before Spaces had dtypes,
        # we need to set the _dtype to the default value.
        if '_dtype' not in state_dict:
            self._dtype = theano.config.floatX


class IndexSpace(SimplyTypedSpace):
    """
    A space representing indices, for example MNIST labels (0-10) or the
    indices of words in a dictionary for NLP tasks. A single space can
    contain multiple indices, for example the word indices of an n-gram.

    IndexSpaces can be converted to VectorSpaces in two ways: Either the
    labels are converted into one-hot vectors which are then concatenated,
    or they are converted into a single vector where 1s indicate labels
    present i.e. for 4 possible labels we have [0, 2] -> [1 0 1 0] or
    [0, 2] -> [1 0 0 0 0 0 1 0].

    Parameters
    ----------
    max_labels : int
        The number of possible classes/labels. This means that
        all labels should be < max_labels. Example: For MNIST
        there are 10 numbers and hence max_labels = 10.
    dim : int
        The number of indices in one space e.g. for MNIST there is
        one target label and hence dim = 1. If we have an n-gram
        of word indices as input to a neurel net language model, dim = n.
    dtype : str
        A numpy dtype string indicating this space's dtype.
        Must be an integer type e.g. int32 or int64.
    kwargs : dict
        Passes on to superclass constructor
    """
    def __init__(self, max_labels, dim, dtype='int64', **kwargs):
        if 'int' not in dtype:
            raise ValueError("The dtype of IndexSpace must be an integer type")

        super(IndexSpace, self).__init__(dtype, **kwargs)

        self.max_labels = max_labels
        self.dim = dim
        self.formatter = OneHotFormatter(self.max_labels)

    def __str__(self):
        """Return a string representation"""
        return ('%(classname)s(dim=%(dim)s, max_labels=%(max_labels)s, '
                'dtype=%(dtype)s)') % dict(classname=self.__class__.__name__,
                                           dim=self.dim,
                                           max_labels=self.max_labels,
                                           dtype=self.dtype)

    def __hash__(self):
        return hash((type(self), self.dim, self.max_labels, self.dtype))

    def __eq__(self, other):
        """
        .. todo::

            WRITEME
        """
        return (type(self) == type(other) and
                self.max_labels == other.max_labels and
                self.dim == other.dim and
                self.dtype == other.dtype)

    def __ne__(self, other):
        """
        .. todo::

            WRITEME
        """
        return (not self == other)

    @functools.wraps(Space.get_total_dimension)
    def get_total_dimension(self):
        return self.dim

    @functools.wraps(Space.get_origin)
    def get_origin(self):
        return np.zeros((1, self.dim,))

    @functools.wraps(Space.get_origin_batch)
    def get_origin_batch(self, batch_size, dtype=None):
        dtype = self._clean_dtype_arg(dtype)
        return np.zeros((batch_size, self.dim), dtype=dtype)

    @functools.wraps(Space._check_sizes)
    def _check_sizes(self, space):
        if isinstance(space, VectorSpace):
            if space.dim not in (self.max_labels,              # merged onehots
                                 self.dim * self.max_labels):  # concatenated
                raise ValueError("Can't convert to VectorSpace of dim %d. "
                                 "Expected either dim=%d (merged one-hots) or "
                                 "%d (concatenated one-hots)" %
                                 (space.dim,
                                  self.max_labels,
                                  self.dim * self.max_labels))
        elif isinstance(space, IndexSpace):
            if space.dim != self.dim or space.max_labels != self.max_labels:
                raise ValueError("Can't convert to IndexSpace of dim %d and "
                                 "max_labels %d." %
                                 (space.dim, self.max_labels))
        else:
            raise ValueError("Can't convert to " + str(space.__class__))

    @functools.wraps(Space._format_as_impl)
    def _format_as_impl(self, is_numeric, batch, space):
        if isinstance(space, VectorSpace):
            if self.max_labels == space.dim:
                mode = 'merge'
            elif self.dim * self.max_labels == space.dim:
                mode = 'concatenate'
            else:
                raise ValueError("There is a bug. Couldn't format to a "
                                 "VectorSpace because it had an incorrect "
                                 "size, but this should've been caught in "
                                 "IndexSpace._check_sizes().")

            format_func = (self.formatter.format if is_numeric else
                           self.formatter.theano_expr)
            return _cast(format_func(batch, sparse=space.sparse, mode=mode),
                         space.dtype)
        elif isinstance(space, IndexSpace):
            if space.dim != self.dim or space.max_labels != self.max_labels:
                raise ValueError("The two IndexSpaces' dim and max_labels "
                                 "values don't match. This should have been "
                                 "caught by IndexSpace._check_sizes().")

            return _cast(batch, space.dtype)
        else:
            raise ValueError("Can't convert %s to %s"
                             % (self, space))

    @functools.wraps(Space.make_theano_batch)
    def make_theano_batch(self, name=None, dtype=None, batch_size=None):
        if batch_size == 1:
            rval = tensor.lrow(name=name)
        else:
            rval = tensor.lmatrix(name=name)

        if theano.config.compute_test_value != 'off':
            if batch_size == 1:
                n = 1
            else:
                # TODO: try to extract constant scalar value from batch_size
                n = 4
            rval.tag.test_value = self.get_origin_batch(batch_size=n,
                                                        dtype=dtype)
        return rval

    @functools.wraps(Space._batch_size_impl)
    def _batch_size_impl(self, is_numeric, batch):
        return batch.shape[0]

    @functools.wraps(Space._validate_impl)
    def _validate_impl(self, is_numeric, batch):
        """
        .. todo::

            WRITEME
        """
        # checks that batch isn't a tuple, checks batch.type against self.dtype
        super(IndexSpace, self)._validate_impl(is_numeric, batch)

        if is_numeric:
            # Use the 'CudaNdarray' string to avoid importing
            # theano.sandbox.cuda when it is not available
            if not isinstance(batch, np.ndarray) \
               and str(type(batch)) != "<type 'CudaNdarray'>":
                raise TypeError("The value of a IndexSpace batch should be a "
                                "numpy.ndarray, or CudaNdarray, but is %s."
                                % str(type(batch)))
            if batch.ndim != 2:
                raise ValueError("The value of a IndexSpace batch must be "
                                 "2D, got %d dimensions for %s." % (batch.ndim,
                                                                    batch))
            if batch.shape[1] != self.dim:
                raise ValueError("The width of a IndexSpace batch must match "
                                 "with the space's dimension, but batch has "
                                 "shape %s and dim = %d." % (str(batch.shape),
                                                             self.dim))
        else:
            if not isinstance(batch, theano.gof.Variable):
                raise TypeError("IndexSpace batch should be a theano "
                                "Variable, got " + str(type(batch)))
            if not isinstance(batch.type, (theano.tensor.TensorType,
                                           CudaNdarrayType)):
                raise TypeError("IndexSpace batch should be TensorType or "
                                "CudaNdarrayType, got " + str(batch.type))
            if batch.ndim != 2:
                raise ValueError('IndexSpace batches must be 2D, got %d '
                                 'dimensions' % batch.ndim)
            for val in get_debug_values(batch):
                self.np_validate(val)


class VectorSpace(SimplyTypedSpace):
    """
    A space whose points are defined as fixed-length vectors.

    Parameters
    ----------
    dim : int
        Dimensionality of a vector in this space.
    sparse : bool, optional
        Sparse vector or not
    dtype : str, optional
        A numpy dtype string (e.g. 'float32') indicating this space's
        dtype, or None for a dtype-agnostic space.
    kwargs : dict
        Passed on to superclass constructor.
    """

    def __init__(self,
                 dim,
                 sparse=False,
                 dtype='floatX',
                 **kwargs):
        super(VectorSpace, self).__init__(dtype, **kwargs)

        self.dim = dim
        self.sparse = sparse

    def __str__(self):
        """
        .. todo::

            WRITEME
        """
        return ('%s(dim=%d%s, dtype=%s)' %
                (self.__class__.__name__,
                 self.dim,
                 ', sparse' if self.sparse else '',
                 self.dtype))

    @functools.wraps(Space.get_origin)
    def get_origin(self):
        return np.zeros((self.dim,))

    @functools.wraps(Space.get_origin_batch)
    def get_origin_batch(self, batch_size, dtype=None):
        dtype = self._clean_dtype_arg(dtype)

        if self.sparse:
            return scipy.sparse.csr_matrix((batch_size, self.dim), dtype=dtype)
        else:
            return np.zeros((batch_size, self.dim), dtype=dtype)

    @functools.wraps(Space._batch_size_impl)
    def _batch_size_impl(self, is_numeric, batch):
        return batch.shape[0]

    @functools.wraps(Space.make_theano_batch)
    def make_theano_batch(self, name=None, dtype=None, batch_size=None):
        dtype = self._clean_dtype_arg(dtype)

        if self.sparse:
            if batch_size is not None:
                raise NotImplementedError("batch_size not implemented "
                                          "for sparse case")
            rval = theano.sparse.csr_matrix(name=name, dtype=dtype)
        else:
            if batch_size == 1:
                rval = tensor.row(name=name, dtype=dtype)
            else:
                rval = tensor.matrix(name=name, dtype=dtype)

        if theano.config.compute_test_value != 'off':
            if batch_size == 1:
                n = 1
            else:
                # TODO: try to extract constant scalar value from batch_size
                n = 4
            rval.tag.test_value = self.get_origin_batch(batch_size=n,
                                                        dtype=dtype)
        return rval

    @functools.wraps(Space.get_total_dimension)
    def get_total_dimension(self):
        return self.dim

    @functools.wraps(Space._format_as_impl)
    def _format_as_impl(self, is_numeric, batch, space):
        to_type = None

        def is_sparse(batch):
            return (isinstance(batch, theano.sparse.SparseVariable) or
                    scipy.sparse.issparse(batch))

        if not isinstance(space, IndexSpace):
            my_dimension = self.get_total_dimension()
            other_dimension = space.get_total_dimension()
            if my_dimension != other_dimension:
                raise ValueError(str(self) + " with total dimension " +
                                 str(my_dimension) +
                                 " can't format a batch into " +
                                 str(space) +
                                 "because its total dimension is " +
                                 str(other_dimension))

        if isinstance(space, CompositeSpace):
            if isinstance(batch, theano.sparse.SparseVariable):
                warnings.warn('Formatting from a sparse VectorSpace to a '
                              'CompositeSpace is currently (2 Jan 2014) a '
                              'non-differentiable action. This is because it '
                              'calls slicing operations on a sparse batch '
                              '(e.g. "my_matrix[r:R, c:C]", which Theano does '
                              'not yet have a gradient operator for. If '
                              'autodifferentiation is reporting an error, '
                              'this may be why. Formatting batch type %s '
                              'from space %s to space %s' %
                              (type(batch), self, space))
            pos = 0
            pieces = []
            for component in space.components:
                width = component.get_total_dimension()
                subtensor = batch[:, pos:pos + width]
                pos += width
                vector_subspace = VectorSpace(dim=width,
                                              dtype=self.dtype,
                                              sparse=self.sparse)
                formatted = vector_subspace._format_as(is_numeric,
                                                       subtensor,
                                                       component)
                pieces.append(formatted)

            result = tuple(pieces)

        elif isinstance(space, Conv2DSpace):
            if is_sparse(batch):
                raise TypeError("Formatting a SparseVariable to a Conv2DSpace "
                                "is not supported, since neither scipy nor "
                                "Theano has sparse tensors with more than 2 "
                                "dimensions. We need 4 dimensions to "
                                "represent a Conv2DSpace batch")

            dims = {'b': batch.shape[0],
                    'c': space.num_channels,
                    0: space.shape[0],
                    1: space.shape[1]}
            if space.axes != space.default_axes:
                # Always use default_axes, so conversions like
                # Conv2DSpace(c01b) -> VectorSpace -> Conv2DSpace(b01c) work
                shape = [dims[ax] for ax in space.default_axes]
                batch = _reshape(batch, shape)
                batch = batch.transpose(*[space.default_axes.index(ax)
                                          for ax in space.axes])
                result = batch
            else:
                shape = tuple([dims[elem] for elem in space.axes])
                result = _reshape(batch, shape)

            to_type = space.dtype

        elif isinstance(space, VectorSpace):
            if self.dim != space.dim:
                raise ValueError("Can't convert between VectorSpaces of "
                                 "different sizes (%d to %d)."
                                 % (self.dim, space.dim))

            if space.sparse != is_sparse(batch):
                if space.sparse:
                    batch = _dense_to_sparse(batch)
                elif isinstance(batch, theano.sparse.SparseVariable):
                    batch = theano.sparse.dense_from_sparse(batch)
                elif scipy.sparse.issparse(batch):
                    batch = batch.todense()
                else:
                    assert False, ("Unplanned-for branch in if-elif-elif "
                                   "chain. This is a bug in the code.")

            result = batch
            to_type = space.dtype
        else:
            raise NotImplementedError("%s doesn't know how to format as %s" %
                                      (self, space))

        return _cast(result, dtype=to_type)

    @functools.wraps(Space._undo_format_as_impl)
    def _undo_format_as_impl(self, batch, space):

        def is_sparse(batch):
            return isinstance(batch, theano.sparse.SparseVariable)

        if not isinstance(space, IndexSpace):
            my_dimension = self.get_total_dimension()
            other_dimension = space.get_total_dimension()
            if my_dimension != other_dimension:
                raise ValueError(str(self) + " with total dimension " +
                                 str(my_dimension) +
                                 " can't undo format a batch from " +
                                 str(space) +
                                 "because its total dimension is " +
                                 str(other_dimension))

        if isinstance(space, CompositeSpace):
            if isinstance(batch, theano.sparse.SparseVariable):
                warnings.warn('Undo formatting from a sparse VectorSpace to a '
                              'CompositeSpace is currently (2 Jan 2014) a '
                              'non-differentiable action. This is because it '
                              'calls slicing operations on a sparse batch '
                              '(e.g. "my_matrix[r:R, c:C]", which Theano does '
                              'not yet have a gradient operator for. If '
                              'autodifferentiation is reporting an error, '
                              'this may be why. Formatting batch type %s '
                              'from space %s to space %s' %
                              (type(batch), self, space))

            # Recursively try and find a non-Composite, non-Null space
            # to extract underlying theano variable
            def extract_vector_variable(composite_space, batch_tuple):
                found = False
                for sp, el in safe_zip(composite_space.components,
                                       batch_tuple):
                    dim = sp.get_total_dimension()
                    if not isinstance(sp, NullSpace) and dim > 0:
                        if isinstance(sp, CompositeSpace):
                            var, found = extract_vector_variable(sp, el)
                            var = var.owner.inputs[0]
                        else:
                            dummy_sp = VectorSpace(dim=dim,
                                                   sparse=sp.sparse,
                                                   dtype=sp.dtype
                                                   )
                            var = dummy_sp.undo_format_as(el, sp)
                            found = True
                    if found:
                        break
                return var, found

            var, found = extract_vector_variable(space, batch)
            batch = var

            if not found:
                raise TypeError("Could not find a valid space "
                                "to undo format from in the "
                                "CompositeSpace.")
            else:
                # Undo subtensor slice
                owner = batch.owner
                assert 'Subtensor' in str(owner.op)
                batch = owner.inputs[0]

        elif isinstance(space, Conv2DSpace):
            if is_sparse(batch):
                raise TypeError("Undo formatting a SparseVariable to a "
                                "Conv2DSpace is not supported, since "
                                "neither scipy nor Theano has sparse "
                                "tensors with more than 2 dimensions. "
                                "We need 4 dimensions to represent a "
                                "Conv2DSpace batch")
            # Check for cast
            batch = _undo_op(batch, 'Cast')
            # Undo axes shuffle
            if space.axes != space.default_axes:
                batch = _undo_op(batch, 'DimShuffle', strict=True)
            # Undo reshape
            batch = _undo_op(batch, 'Reshape{4}', strict=True)

        elif isinstance(space, VectorSpace):
            if self.dim != space.dim:
                raise ValueError("Can't convert between VectorSpaces of "
                                 "different sizes (%d to %d)."
                                 % (self.dim, space.dim))
            # Check for cast
            batch = _undo_op(batch, 'Cast')
            # Undo any sparse-dense switches
            if self.sparse != is_sparse(batch):
                if space.sparse:
                    batch = _undo_op(batch, 'SparseFromDense', strict=True)
                elif isinstance(batch, theano.sparse.SparseVariable):
                    batch = _undo_op(batch, 'DenseFromSparse', strict=True)
                else:
                    assert False, ("Unplanned-for branch in if-elif "
                                   "chain. This is a bug in the code.")

        else:
            raise NotImplementedError("%s doesn't know how to format as %s" %
                                      (self, space))

        return batch

    def __eq__(self, other):
        """
        .. todo::

            WRITEME
        """
        return (type(self) == type(other) and
                self.dim == other.dim and
                self.sparse == other.sparse and
                self.dtype == other.dtype)

    def __hash__(self):
        """
        .. todo::

            WRITEME
        """
        return hash((type(self), self.dim, self.sparse, self.dtype))

    @functools.wraps(Space._validate_impl)
    def _validate_impl(self, is_numeric, batch):
        """
        .. todo::

            WRITEME
        """

        # checks that batch isn't a tuple, checks batch.type against self.dtype
        super(VectorSpace, self)._validate_impl(is_numeric, batch)

        if isinstance(batch, theano.gof.Variable):
            if self.sparse:
                if not isinstance(batch.type, theano.sparse.SparseType):
                    raise TypeError('This VectorSpace is%s sparse, but the '
                                    'provided batch is not. (batch type: "%s")'
                                    % ('' if self.sparse else ' not',
                                       type(batch)))
            elif not isinstance(batch.type, (theano.tensor.TensorType,
                                             CudaNdarrayType)):
                raise TypeError("VectorSpace batch should be TensorType or "
                                "CudaNdarrayType, got " + str(batch.type))

            if batch.ndim != 2:
                raise ValueError('VectorSpace batches must be 2D, got %d '
                                 'dimensions' % batch.ndim)
            for val in get_debug_values(batch):
                self.np_validate(val)  # sic; val is numeric, not symbolic
        else:
            # Use the 'CudaNdarray' string to avoid importing
            # theano.sandbox.cuda when it is not available
            if (not self.sparse
                    and not isinstance(batch, np.ndarray)
                    and type(batch) != 'CudaNdarray'):
                raise TypeError("The value of a VectorSpace batch should be a "
                                "numpy.ndarray, or CudaNdarray, but is %s."
                                % str(type(batch)))
            if self.sparse:
                if not theano.sparse.enable_sparse:
                    raise TypeError("theano.sparse is not enabled, cannot "
                                    "have a value for a sparse VectorSpace.")
                if not scipy.sparse.issparse(batch):
                    raise TypeError("The value of a sparse VectorSpace batch "
                                    "should be a sparse scipy matrix, got %s "
                                    "of type %s." % (batch, type(batch)))
            if batch.ndim != 2:
                raise ValueError("The value of a VectorSpace batch must be "
                                 "2D, got %d dimensions for %s." % (batch.ndim,
                                                                    batch))
            if batch.shape[1] != self.dim:
                raise ValueError("The width of a VectorSpace batch must match "
                                 "with the space's dimension, but batch has "
                                 "shape %s and dim = %d." %
                                 (str(batch.shape), self.dim))


class VectorSequenceSpace(SimplyTypedSpace):
    """
    A space representing a single, variable-length sequence of fixed-sized
    vectors.

    Parameters
    ----------
    dim : int
        Vector size
    dtype : str, optional
        A numpy dtype string indicating this space's dtype.
    kwargs : dict
        Passes on to superclass constructor
    """
    def __init__(self, dim, dtype='floatX', **kwargs):
        super(VectorSequenceSpace, self).__init__(dtype, **kwargs)
        self.dim = dim

    def __str__(self):
        """Return a string representation"""
        return ('%(classname)s(dim=%(dim)s, dtype=%(dtype)s)' %
                dict(classname=self.__class__.__name__,
                     dim=self.dim,
                     dtype=self.dtype))

    @wraps(Space.__eq__)
    def __eq__(self, other):
        return (type(self) == type(other) and
                self.dim == other.dim and
                self.dtype == other.dtype)

    @wraps(Space._check_sizes)
    def _check_sizes(self, space):
        if not isinstance(space, VectorSequenceSpace):
            raise ValueError("Can't convert to " + str(space.__class__))
        else:
            if space.dim != self.dim:
                raise ValueError("Can't convert to VectorSequenceSpace of "
                                 "dim %d" %
                                 (space.dim,))

    @wraps(Space._format_as_impl)
    def _format_as_impl(self, is_numeric, batch, space):
        if isinstance(space, VectorSequenceSpace):
            if space.dim != self.dim:
                raise ValueError("The two VectorSequenceSpaces' dim "
                                 "values don't match. This should have been "
                                 "caught by "
                                 "VectorSequenceSpace._check_sizes().")

            return _cast(batch, space.dtype)
        else:
            raise ValueError("Can't convert %s to %s" % (self, space))

    @wraps(Space.make_theano_batch)
    def make_theano_batch(self, name=None, dtype=None, batch_size=None):
        if batch_size == 1:
            return tensor.matrix(name=name)
        else:
            return ValueError("VectorSequenceSpace does not support batches "
                              "of sequences.")

    @wraps(Space._batch_size_impl)
    def _batch_size_impl(self, is_numeric, batch):
        # Only batch size of 1 is supported
        return 1

    @wraps(Space._validate_impl)
    def _validate_impl(self, is_numeric, batch):
        # checks that batch isn't a tuple, checks batch.type against self.dtype
        super(VectorSequenceSpace, self)._validate_impl(is_numeric, batch)

        if is_numeric:
            # Use the 'CudaNdarray' string to avoid importing
            # theano.sandbox.cuda when it is not available
            if not isinstance(batch, np.ndarray) \
               and str(type(batch)) != "<type 'CudaNdarray'>":
                raise TypeError("The value of a VectorSequenceSpace batch "
                                "should be a numpy.ndarray, or CudaNdarray, "
                                "but is %s." % str(type(batch)))
            if batch.ndim != 2:
                raise ValueError("The value of a VectorSequenceSpace batch "
                                 "must be 2D, got %d dimensions for %s."
                                 % (batch.ndim, batch))
            if batch.shape[1] != self.dim:
                raise ValueError("The width of a VectorSequenceSpace 'batch' "
                                 "must match with the space's window"
                                 "dimension, but batch has dim %d and "
                                 "this space's dim is %d."
                                 % (batch.shape[1], self.dim))
        else:
            if not isinstance(batch, theano.gof.Variable):
                raise TypeError("VectorSequenceSpace batch should be a theano "
                                "Variable, got " + str(type(batch)))
            if not isinstance(batch.type, (theano.tensor.TensorType,
                                           CudaNdarrayType)):
                raise TypeError("VectorSequenceSpace batch should be "
                                "TensorType or CudaNdarrayType, got " +
                                str(batch.type))
            if batch.ndim != 2:
                raise ValueError("VectorSequenceSpace 'batches' must be 2D, "
                                 "got %d dimensions" % batch.ndim)
            for val in get_debug_values(batch):
                self.np_validate(val)


class IndexSequenceSpace(SimplyTypedSpace):
    """
    A space representing a single, variable-length sequence of indexes.

    Parameters
    ----------
    max_labels : int
        The number of possible classes/labels. This means that
        all labels should be < max_labels.
    dim : int
        The number of indices in one element of the sequence
    dtype : str
        A numpy dtype string indicating this space's dtype.
        Must be an integer type e.g. int32 or int64.
    kwargs : dict
        Passes on to superclass constructor
    """
    def __init__(self, max_labels, dim, dtype='int64', **kwargs):
        if 'int' not in dtype:
            raise ValueError("The dtype of IndexSequenceSpace must be an "
                             "integer type")

        super(IndexSequenceSpace, self).__init__(dtype, **kwargs)

        self.max_labels = max_labels
        self.dim = dim
        self.formatter = OneHotFormatter(self.max_labels)

    def __str__(self):
        """Return a string representation"""
        return ('%(classname)s(dim=%(dim)s, max_labels=%(max_labels)s, '
                'dtype=%(dtype)s)') % dict(classname=self.__class__.__name__,
                                           dim=self.dim,
                                           max_labels=self.max_labels,
                                           dtype=self.dtype)

    def __eq__(self, other):
        """
        .. todo::

            WRITEME
        """
        return (type(self) == type(other) and
                self.max_labels == other.max_labels and
                self.dim == other.dim and
                self.dtype == other.dtype)

    @wraps(Space._check_sizes)
    def _check_sizes(self, space):
        if isinstance(space, VectorSequenceSpace):
            # self.max_labels -> merged onehots
            # self.dim * self.max_labels -> concatenated
            if space.dim not in (self.max_labels, self.dim * self.max_labels):
                raise ValueError("Can't convert to VectorSequenceSpace of "
                                 "dim %d. Expected either "
                                 "dim=%d (merged one-hots) or %d "
                                 "(concatenated one-hots)" %
                                 (space.dim,
                                  self.max_labels,
                                  self.dim * self.max_labels))
        elif isinstance(space, IndexSequenceSpace):
            if space.dim != self.dim or space.max_labels != self.max_labels:
                raise ValueError("Can't convert to IndexSequenceSpace of "
                                 "dim %d and max_labels %d." %
                                 (space.dim, self.max_labels))
        else:
            raise ValueError("Can't convert to " + str(space.__class__))

    @wraps(Space._format_as_impl)
    def _format_as_impl(self, is_numeric, batch, space):
        if isinstance(space, VectorSequenceSpace):
            if self.max_labels == space.dim:
                mode = 'merge'
            elif self.dim * self.max_labels == space.dim:
                mode = 'concatenate'
            else:
                raise ValueError("There is a bug. Couldn't format to a "
                                 "VectorSequenceSpace because it had an "
                                 "incorrect size, but this should've been "
                                 "caught in "
                                 "IndexSequenceSpace._check_sizes().")

            format_func = (self.formatter.format if is_numeric else
                           self.formatter.theano_expr)
            return _cast(format_func(batch, mode=mode), space.dtype)
        elif isinstance(space, IndexSequenceSpace):
            if space.dim != self.dim or space.max_labels != self.max_labels:
                raise ValueError("The two IndexSequenceSpaces' dim and "
                                 "max_labels values don't match. This should "
                                 "have been caught by "
                                 "IndexSequenceSpace._check_sizes().")

            return _cast(batch, space.dtype)
        else:
            raise ValueError("Can't convert %s to %s"
                             % (self, space))

    @wraps(Space.make_theano_batch)
    def make_theano_batch(self, name=None, dtype=None, batch_size=None):
        if batch_size == 1:
            return tensor.matrix(name=name)
        else:
            return ValueError("IndexSequenceSpace does not support batches "
                              "of sequences.")

    @wraps(Space._batch_size_impl)
    def _batch_size_impl(self, is_numeric, batch):
        # Only batch size of 1 is supported
        return 1

    @wraps(Space._validate_impl)
    def _validate_impl(self, is_numeric, batch):
        # checks that batch isn't a tuple, checks batch.type against self.dtype
        super(IndexSequenceSpace, self)._validate_impl(is_numeric, batch)

        if is_numeric:
            # Use the 'CudaNdarray' string to avoid importing
            # theano.sandbox.cuda when it is not available
            if not isinstance(batch, np.ndarray) \
               and str(type(batch)) != "<type 'CudaNdarray'>":
                raise TypeError("The value of a IndexSequenceSpace batch "
                                "should be a numpy.ndarray, or CudaNdarray, "
                                "but is %s." % str(type(batch)))
            if batch.ndim != 2:
                raise ValueError("The value of a IndexSequenceSpace batch "
                                 "must be 2D, got %d dimensions for %s." %
                                 (batch.ndim, batch))
            if batch.shape[1] != self.dim:
                raise ValueError("The width of a IndexSequenceSpace batch "
                                 "must match with the space's dimension, but "
                                 "batch has shape %s and dim = %d." %
                                 (str(batch.shape), self.dim))
        else:
            if not isinstance(batch, theano.gof.Variable):
                raise TypeError("IndexSequenceSpace batch should be a theano "
                                "Variable, got " + str(type(batch)))
            if not isinstance(batch.type, (theano.tensor.TensorType,
                                           CudaNdarrayType)):
                raise TypeError("IndexSequenceSpace batch should be "
                                "TensorType or CudaNdarrayType, got " +
                                str(batch.type))
            if batch.ndim != 2:
                raise ValueError('IndexSequenceSpace batches must be 2D, got '
                                 '%d dimensions' % batch.ndim)
            for val in get_debug_values(batch):
                self.np_validate(val)


class Conv2DSpace(SimplyTypedSpace):
    """
    A space whose points are 3-D tensors representing (potentially
    multi-channel) images.

    Parameters
    ----------
    shape : sequence, length 2
        The shape of a single image, i.e. (rows, cols).
    num_channels : int (synonym: channels)
        Number of channels in the image, i.e. 3 if RGB.
    axes : tuple
        A tuple indicating the semantics of each axis, containing the
        following elements in some order:

            - 'b' : this axis is the batch index of a minibatch.
            - 'c' : this axis the channel index of a minibatch.
            - 0 : topological axis 0 (rows)
            - 1 : topological axis 1 (columns)

        For example, a PIL image has axes (0, 1, 'c') or (0, 1).
        The pylearn2 image displaying functionality uses
        ('b', 0, 1, 'c') for batches and (0, 1, 'c') for images.
        theano's conv2d operator uses ('b', 'c', 0, 1) images.
    dtype : str
        A numpy dtype string (e.g. 'float32') indicating this space's
        dtype, or None for a dtype-agnostic space.
    kwargs : dict
        Passed on to superclass constructor
    """

    # Assume pylearn2's get_topological_view format, since this is how
    # data is currently served up. If we make better iterators change
    # default to ('b', 'c', 0, 1) for theano conv2d
    default_axes = ('b', 0, 1, 'c')

    def __init__(self,
                 shape,
                 channels=None,
                 num_channels=None,
                 axes=None,
                 dtype='floatX',
                 **kwargs):

        super(Conv2DSpace, self).__init__(dtype, **kwargs)

        assert (channels is None) + (num_channels is None) == 1
        if num_channels is None:
            num_channels = channels

        assert isinstance(num_channels, py_integer_types)

        if not hasattr(shape, '__len__'):
            raise ValueError("shape argument for Conv2DSpace must have a "
                             "length. Got %s." % str(shape))

        if len(shape) != 2:
            raise ValueError("shape argument to Conv2DSpace must be length 2, "
                             "not %d" % len(shape))

        assert all(isinstance(elem, py_integer_types) for elem in shape)
        assert all(elem > 0 for elem in shape)
        assert isinstance(num_channels, py_integer_types)
        assert num_channels > 0
        # Converts shape to a tuple, so it can be hashable, and self can be too
        self.shape = tuple(shape)
        self.num_channels = num_channels
        if axes is None:
            axes = self.default_axes
        assert len(axes) == 4
        self.axes = tuple(axes)

    def __str__(self):
        """
        .. todo::

            WRITEME
        """
        return ("%s(shape=%s, num_channels=%d, axes=%s, dtype=%s)" %
                (self.__class__.__name__,
                 str(self.shape),
                 self.num_channels,
                 str(self.axes),
                 self.dtype))

    def __eq__(self, other):
        """
        .. todo::

            WRITEME
        """
        assert isinstance(self.axes, tuple)

        if isinstance(other, Conv2DSpace):
            assert isinstance(other.axes, tuple)

        return (type(self) == type(other) and
                self.shape == other.shape and
                self.num_channels == other.num_channels and
                self.axes == other.axes and
                self.dtype == other.dtype)

    def __hash__(self):
        """
        .. todo::

            WRITEME
        """
        return hash((type(self),
                     self.shape,
                     self.num_channels,
                     self.axes,
                     self.dtype))

    @functools.wraps(Space.get_batch_axis)
    def get_batch_axis(self):
        return self.axes.index('b')

    @functools.wraps(Space.get_origin)
    def get_origin(self):
        dims = {0: self.shape[0], 1: self.shape[1], 'c': self.num_channels}
        shape = [dims[elem] for elem in self.axes if elem != 'b']
        return np.zeros(shape, dtype=self.dtype)

    @functools.wraps(Space.get_origin_batch)
    def get_origin_batch(self, batch_size, dtype=None):
        dtype = self._clean_dtype_arg(dtype)

        if not isinstance(batch_size, py_integer_types):
            raise TypeError("Conv2DSpace.get_origin_batch expects an int, "
                            "got %s of type %s" % (str(batch_size),
                                                   type(batch_size)))
        assert batch_size > 0
        dims = {'b': batch_size,
                0: self.shape[0],
                1: self.shape[1],
                'c': self.num_channels}
        shape = [dims[elem] for elem in self.axes]
        return np.zeros(shape, dtype=dtype)

    @functools.wraps(Space.make_theano_batch)
    def make_theano_batch(self, name=None, dtype=None, batch_size=None):
        dtype = self._clean_dtype_arg(dtype)
        broadcastable = [False] * 4
        broadcastable[self.axes.index('c')] = (self.num_channels == 1)
        broadcastable[self.axes.index('b')] = (batch_size == 1)
        broadcastable = tuple(broadcastable)

        rval = TensorType(dtype=dtype,
                          broadcastable=broadcastable
                          )(name=name)
        if theano.config.compute_test_value != 'off':
            if batch_size == 1:
                n = 1
            else:
                # TODO: try to extract constant scalar value from batch_size
                n = 4
            rval.tag.test_value = self.get_origin_batch(batch_size=n,
                                                        dtype=dtype)
        return rval

    @functools.wraps(Space._batch_size_impl)
    def _batch_size_impl(self, is_numeric, batch):
        return batch.shape[self.axes.index('b')]

    @staticmethod
    def convert(tensor, src_axes, dst_axes):
        """
        Returns a view of tensor using the axis semantics defined
        by dst_axes. (If src_axes matches dst_axes, returns
        tensor itself)

        Useful for transferring tensors between different
        Conv2DSpaces.

        Parameters
        ----------
        tensor : tensor_like
            A 4-tensor representing a batch of images
        src_axes : WRITEME
            Axis semantics of tensor
        dst_axes : WRITEME
            WRITEME
        """
        src_axes = tuple(src_axes)
        dst_axes = tuple(dst_axes)
        assert len(src_axes) == 4
        assert len(dst_axes) == 4

        if src_axes == dst_axes:
            return tensor

        shuffle = [src_axes.index(elem) for elem in dst_axes]

        if is_symbolic_batch(tensor):
            return tensor.dimshuffle(*shuffle)
        else:
            return tensor.transpose(*shuffle)

    @staticmethod
    def convert_numpy(tensor, src_axes, dst_axes):
        """
        .. todo::

            WRITEME
        """
        return Conv2DSpace.convert(tensor, src_axes, dst_axes)

    @functools.wraps(Space.get_total_dimension)
    def get_total_dimension(self):

        # Patch old pickle files
        if not hasattr(self, 'num_channels'):
            self.num_channels = self.nchannels

        return self.shape[0] * self.shape[1] * self.num_channels

    @functools.wraps(Space._validate_impl)
    def _validate_impl(self, is_numeric, batch):
        # checks batch.type against self.dtype
        super(Conv2DSpace, self)._validate_impl(is_numeric, batch)

        if not is_numeric:
            if isinstance(batch, theano.sparse.SparseVariable):
                raise TypeError("Conv2DSpace cannot use SparseVariables, "
                                "since as of this writing (28 Dec 2013), "
                                "there is not yet a SparseVariable type with "
                                "4 dimensions")

            if not isinstance(batch, theano.gof.Variable):
                raise TypeError("Conv2DSpace batches must be theano "
                                "Variables, got " + str(type(batch)))

            if not isinstance(batch.type, (theano.tensor.TensorType,
                                           CudaNdarrayType)):
                raise TypeError('Expected TensorType or CudaNdArrayType, got '
                                '"%s"' % type(batch.type))

            if batch.ndim != 4:
                raise ValueError("The value of a Conv2DSpace batch must be "
                                 "4D, got %d dimensions for %s." %
                                 (batch.ndim, batch))

            for val in get_debug_values(batch):
                self.np_validate(val)
        else:
            if scipy.sparse.issparse(batch):
                raise TypeError("Conv2DSpace cannot use sparse batches, since "
                                "scipy.sparse does not support 4 dimensional "
                                "tensors currently (28 Dec 2013).")

            if (not isinstance(batch, np.ndarray)) \
               and type(batch) != 'CudaNdarray':
                raise TypeError("The value of a Conv2DSpace batch should be a "
                                "numpy.ndarray, or CudaNdarray, but is %s."
                                % str(type(batch)))

            if batch.ndim != 4:
                raise ValueError("The value of a Conv2DSpace batch must be "
                                 "4D, got %d dimensions for %s." %
                                 (batch.ndim, batch))

            d = self.axes.index('c')
            actual_channels = batch.shape[d]
            if actual_channels != self.num_channels:
                raise ValueError("Expected axis %d to be number of channels "
                                 "(%d) but it is %d" %
                                 (d, self.num_channels, actual_channels))
            assert batch.shape[self.axes.index('c')] == self.num_channels

            for coord in [0, 1]:
                d = self.axes.index(coord)
                actual_shape = batch.shape[d]
                expected_shape = self.shape[coord]
                if actual_shape != expected_shape:
                    raise ValueError("Conv2DSpace with shape %s and axes %s "
                                     "expected dimension %s of a batch (%s) "
                                     "to have length %s but it has %s"
                                     % (str(self.shape),
                                        str(self.axes),
                                        str(d),
                                        str(batch),
                                        str(expected_shape),
                                        str(actual_shape)))

    @functools.wraps(Space._format_as_impl)
    def _format_as_impl(self, is_numeric, batch, space):
        if isinstance(space, VectorSpace):
            # We need to ensure that the resulting batch will always be
            # the same in `space`, no matter what the axes of `self` are.
            if self.axes != self.default_axes:
                # The batch index goes on the first axis
                assert self.default_axes[0] == 'b'
                batch = batch.transpose(*[self.axes.index(axis)
                                          for axis in self.default_axes])
            result = batch.reshape((batch.shape[0],
                                    self.get_total_dimension()))
            if space.sparse:
                result = _dense_to_sparse(result)

        elif isinstance(space, Conv2DSpace):
            result = Conv2DSpace.convert(batch, self.axes, space.axes)
        else:
            raise NotImplementedError("%s doesn't know how to format as %s"
                                      % (str(self), str(space)))

        return _cast(result, space.dtype)

    @functools.wraps(Space._undo_format_as_impl)
    def _undo_format_as_impl(self, batch, space):
        # Check for cast
        batch = _undo_op(batch, 'Cast')

        if isinstance(space, VectorSpace):
            # Check for SparseFromDense
            batch = _undo_op(batch, 'SparseFromDense')
            # Undo reshape op
            batch = _undo_op(batch, 'Reshape', strict=True)
            # Check to see if axis ordering was changed
            if self.axes != self.default_axes:
                batch = _undo_op(batch, 'DimShuffle', strict=True)

        elif isinstance(space, Conv2DSpace):
            # Check to see if axis ordering was changed
            if space.axes != self.axes:
                batch = _undo_op(batch, 'DimShuffle', strict=True)
        else:
            raise NotImplementedError("%s doesn't know how to format as %s"
                                      % (str(self), str(space)))

        return batch


class CompositeSpace(Space):
    """
    A Space whose points are tuples of points in other spaces.
    May be nested, in which case the points are nested tuples.

    Parameters
    ----------
    components : WRITEME
    kwargs : dict
        WRITEME
    """
    def __init__(self, components, **kwargs):
        super(CompositeSpace, self).__init__(**kwargs)

        assert isinstance(components, (list, tuple))

        for i, component in enumerate(components):
            if not isinstance(component, Space):
                raise TypeError("component %d is %s of type %s, expected "
                                "Space instance. " %
                                (i, str(component), str(type(component))))
        self.components = list(components)

    def __eq__(self, other):
        """
        .. todo::

            WRITEME
        """
        return (type(self) == type(other) and
                len(self.components) == len(other.components) and
                all(my_component == other_component for
                    my_component, other_component in
                    zip(self.components, other.components)))

    def __hash__(self):
        """
        .. todo::

            WRITEME
        """
        return hash((type(self), tuple(self.components)))

    def __str__(self):
        """
        .. todo::

            WRITEME
        """
        return '%(classname)s(%(components)s)' % \
               dict(classname=self.__class__.__name__,
                    components=', '.join([str(c) for c in self.components]))

    @property
    def dtype(self):
        """
        Returns a nested tuple of dtype strings. NullSpaces will yield a bogus
        dtype string (see NullSpace.dtype).
        """

        def get_dtype_of_space(space):
            if isinstance(space, CompositeSpace):
                return tuple(get_dtype_of_space(c) for c in space.components)
            elif isinstance(space, NullSpace):
                return NullSpace().dtype
            else:
                return space.dtype

        return get_dtype_of_space(self)

    @dtype.setter
    def dtype(self, new_dtype):
        """
        If new_dtype is None or a string, it will be applied to all components
        (except any NullSpaces).

        If new_dtype is a (nested) tuple, its elements will be applied to
        corresponding components.
        """
        if isinstance(new_dtype, tuple):
            for component, new_dt in safe_zip(self.components, new_dtype):
                component.dtype = new_dt
        elif new_dtype is None or isinstance(new_dtype, str):
            for component in self.components:
                if not isinstance(component, NullSpace):
                    component.dtype = new_dtype

    def restrict(self, subset):
        """
        Returns a new Space containing only the components whose indices
        are given in subset.

        The new space will contain the components in the order given in the
        subset list.

        Parameters
        ----------
        subset : WRITEME

        Notes
        -----
        The returned Space may not be a CompositeSpace if `subset` contains
        only one index.
        """

        assert isinstance(subset, (list, tuple))

        if len(subset) == 1:
            idx, = subset
            return self.components[idx]

        return CompositeSpace([self.components[i] for i in subset])

    def restrict_batch(self, batch, subset):
        """
        Returns a batch containing only the components whose indices are
        present in subset.

        May not be a tuple anymore if there is only one index.
        Outputs will be ordered in the order that they appear in subset.

        Only supports symbolic batches.

        Parameters
        ----------
        batch : WRITEME
        subset : WRITEME
        """

        self._validate(is_numeric=False, batch=batch)
        assert isinstance(subset, (list, tuple))

        if len(subset) == 1:
            idx, = subset
            return batch[idx]

        return tuple([batch[i] for i in subset])

    @functools.wraps(Space.get_total_dimension)
    def get_total_dimension(self):
        return sum([component.get_total_dimension() for component in
                    self.components])

    @functools.wraps(Space.make_shared_batch)
    def make_shared_batch(self, batch_size, name=None, dtype=None):
        dtype = self._clean_dtype_arg(dtype)
        batch = self.get_origin_batch(batch_size, dtype)

        def recursive_shared(batch):
            if isinstance(batch, tuple):
                return tuple(recursive_shared(b) for b in batch)
            else:
                return theano.shared(batch, name=name)

        return recursive_shared(batch)

    @functools.wraps(Space._format_as_impl)
    def _format_as_impl(self, is_numeric, batch, space):
        """
        Supports formatting to a single VectorSpace, or to a CompositeSpace.

        CompositeSpace->VectorSpace:
          Traverses the nested components in depth-first order, serializing the
          leaf nodes (i.e. the non-composite subspaces) into the VectorSpace.

        CompositeSpace->CompositeSpace:

          Only works for two CompositeSpaces that have the same nested
          structure. Traverses both CompositeSpaces' nested components in
          parallel, converting between corresponding non-composite components
          in <self> and <space> as:

              `self_component._format_as(is_numeric,
                                         batch_component,
                                         space_component)`

        Parameters
        ----------
        batch : WRITEME
        space : WRITEME

        Returns
        -------
        WRITEME
        """

        if isinstance(space, VectorSpace):
            pieces = []
            for component, input_piece in zip(self.components, batch):
                subspace = VectorSpace(dim=component.get_total_dimension(),
                                       dtype=space.dtype,
                                       sparse=space.sparse)
                pieces.append(component._format_as(is_numeric,
                                                   input_piece,
                                                   subspace))

            # Pieces should all have the same dtype, before we concatenate them
            if len(pieces) > 0:
                for piece in pieces[1:]:
                    if pieces[0].dtype != piece.dtype:
                        assert space.dtype is None
                        raise TypeError("Tried to format components with "
                                        "differing dtypes into a VectorSpace "
                                        "with no dtype of its own. "
                                        "dtypes: %s" %
                                        str(tuple(str(p.dtype)
                                                  for p in pieces)))

            if is_symbolic_batch(batch):
                if space.sparse:
                    return theano.sparse.hstack(pieces)
                else:
                    return tensor.concatenate(pieces, axis=1)
            else:
                if space.sparse:
                    return scipy.sparse.hstack(pieces)
                else:
                    return np.concatenate(pieces, axis=1)

        if isinstance(space, CompositeSpace):
            def recursive_format_as(orig_space, batch, dest_space):
                if not (isinstance(orig_space, CompositeSpace) ==
                        isinstance(dest_space, CompositeSpace)):
                    raise TypeError("Can't convert between CompositeSpaces "
                                    "with different tree structures")

                # No need to check batch's tree structure. Space._format_as()
                # already did that by calling _validate(), before calling this
                # method.

                if isinstance(orig_space, CompositeSpace):
                    return tuple(recursive_format_as(os, bt, ds)
                                 for os, bt, ds
                                 in safe_zip(orig_space.components,
                                             batch,
                                             dest_space.components))
                else:
                    return orig_space._format_as(is_numeric, batch, dest_space)

            return recursive_format_as(self, batch, space)

        raise NotImplementedError(str(self) +
                                  " does not know how to format as " +
                                  str(space))

    @functools.wraps(Space._undo_format_as_impl)
    def _undo_format_as_impl(self, batch, space):
        """
        Undoes the formatting to a single VectorSpace, or to a CompositeSpace.

        CompositeSpace->VectorSpace:
          Traverses the nested components in depth-first order, serializing the
          leaf nodes (i.e. the non-composite subspaces) into the VectorSpace.

        CompositeSpace->CompositeSpace:

          Only works for two CompositeSpaces that have the same nested
          structure. Traverses both CompositeSpaces' nested components in
          parallel, converting between corresponding non-composite components
          in <self> and <space> as:

              `self_component._format_as(is_numeric,
                                         batch_component,
                                         space_component)`

        Parameters
        ----------
        batch : WRITEME
        space : WRITEME

        Returns
        -------
        WRITEME
        """

        if isinstance(space, VectorSpace):
            # Undo join
            if space.sparse:
                owner = batch.owner
                assert owner is not None
                assert 'HStack' in str(owner.op)
                batch = owner.inputs
            else:
                owner = batch.owner
                assert owner is not None
                assert str(owner.op) == 'Join'
                # First component is join axis
                batch = owner.inputs[1:]

            def extract_dtype(dtype):
                if isinstance(dtype, tuple):
                    return extract_dtype(dtype[0])
                else:
                    return dtype

            def compose_batch(composite_space, batch_list):
                rval = ()
                for sp, bt in safe_zip(composite_space.components, batch_list):
                    if False and isinstance(sp, CompositeSpace):
                        composed, batch_list = compose_batch(sp, batch_list)
                        rval += (composed,)
                    else:
                        sparse = getattr(sp, 'sparse', False)
                        dtype = extract_dtype(sp.dtype)
                        new_sp = VectorSpace(dim=sp.get_total_dimension(),
                                             dtype=dtype,
                                             sparse=sparse
                                             )
                        new_batch = sp.undo_format_as(bt,
                                                      new_sp)
                        rval += (new_batch,)

                return rval
            composed = compose_batch(self, batch)
            return composed

        if isinstance(space, CompositeSpace):
            def recursive_undo_format_as(orig_space, batch, dest_space):
                if not (isinstance(orig_space, CompositeSpace) ==
                        isinstance(dest_space, CompositeSpace)):
                    raise TypeError("Can't convert between CompositeSpaces "
                                    "with different tree structures")

                # No need to check batch's tree structure.
                # Space.undo_format_as() already did that
                # by calling _validate(), before calling this
                # method.

                if isinstance(orig_space, CompositeSpace):
                    return tuple(recursive_undo_format_as(os, bt, ds)
                                 for os, bt, ds
                                 in safe_zip(orig_space.components,
                                             batch,
                                             dest_space.components))
                else:
                    return orig_space.undo_format_as(batch,
                                                     dest_space)

            return recursive_undo_format_as(self, batch, space)

        raise NotImplementedError(str(self) +
                                  " does not know how to format as " +
                                  str(space))

    @functools.wraps(Space._validate_impl)
    def _validate_impl(self, is_numeric, batch):
        if not isinstance(batch, tuple):
            raise TypeError("The value of a CompositeSpace batch should be a "
                            "tuple, but is %s of type %s." %
                            (batch, type(batch)))
        if len(batch) != len(self.components):
            raise ValueError("Expected %d elements in batch, got %d"
                             % (len(self.components), len(batch)))
        for batch_elem, component in zip(batch, self.components):
            component._validate(is_numeric, batch_elem)

    def get_origin_batch(self, batch_size, dtype=None):
        """
        Calls get_origin_batch on all subspaces, and returns a (nested)
        tuple containing their return values.

        Parameters
        ----------
        batch_size : int
            Batch size.
        dtype : str
            the dtype to use for all the get_origin_batch() calls on
            subspaces. If dtype is None, or a single dtype string, that will
            be used for all calls. If dtype is a (nested) tuple, it must
            mirror the tree structure of this CompositeSpace.
        """

        dtype = self._clean_dtype_arg(dtype)

        return tuple(component.get_origin_batch(batch_size, dt)
                     for component, dt
                     in safe_zip(self.components, dtype))

    @functools.wraps(Space.make_theano_batch)
    def make_theano_batch(self,
                          name=None,
                          dtype=None,
                          batch_size=None):
        """
        Calls make_theano_batch on all subspaces, and returns a (nested)
        tuple containing their return values.

        Parameters
        ----------
        name : str
            Name of the symbolic variable
        dtype : str
            The dtype of the returned batch.
            If dtype is a string, it will be applied to all components.
            If dtype is None, C.dtype will be used for each component C.
            If dtype is a nested tuple, its elements will be applied to
            corresponding elements in the components.
        batch_size : int
            Batch size.
        """

        if name is None:
            name = [None] * len(self.components)
        elif not isinstance(name, (list, tuple)):
            name = ['%s[%i]' % (name, i) for i in xrange(len(self.components))]

        dtype = self._clean_dtype_arg(dtype)

        assert isinstance(name, (list, tuple))
        assert isinstance(dtype, (list, tuple))

        rval = tuple([x.make_theano_batch(name=n,
                                          dtype=d,
                                          batch_size=batch_size)
                      for x, n, d in safe_zip(self.components,
                                              name,
                                              dtype)])
        return rval

    @functools.wraps(Space._batch_size_impl)
    def _batch_size_impl(self, is_numeric, batch):

        def has_no_data(space):
            """
            Returns True if space can contain no data.
            """
            return (isinstance(subspace, NullSpace) or
                    (isinstance(subspace, CompositeSpace) and
                     len(subspace.components) == 0))

        if is_symbolic_batch(batch):
            for subspace, subbatch in safe_zip(self.components, batch):
                if not has_no_data(subspace):
                    return subspace._batch_size(is_numeric, subbatch)

            return 0  # TODO: shouldn't this line return a Theano object?
        else:
            result = None
            for subspace, subbatch in safe_zip(self.components, batch):
                batch_size = subspace._batch_size(is_numeric, subbatch)
                if has_no_data(subspace):
                    assert batch_size == 0
                else:
                    if result is None:
                        result = batch_size
                    elif batch_size != result:
                        raise ValueError("All non-empty components of a "
                                         "CompositeSpace should have the same "
                                         "batch size, but we encountered "
                                         "components with size %s, then %s." %
                                         (result, batch_size))

            return 0 if result is None else result

    def _clean_dtype_arg(self, dtype):
        """
        If dtype is None or a string, this returns a nested tuple that mirrors
        the tree structure of this CompositeSpace, with dtype at the leaves.

        If dtype is a nested tuple, this checks that it has the same tree
        structure as this CompositeSpace.
        """
        super_self = super(CompositeSpace, self)

        def make_dtype_tree(dtype, space):
            """
            Creates a nested tuple tree that mirrors the tree structure of
            <space>, populating the leaves with <dtype>.
            """
            if isinstance(space, CompositeSpace):
                return tuple(make_dtype_tree(dtype, component)
                             for component in space.components)
            else:
                return super_self._clean_dtype_arg(dtype)

        def check_dtype_tree(dtype, space):
            """
            Verifies that a dtype tree mirrors the tree structure of <space>,
            calling Space._clean_dtype_arg on the leaves.
            """
            if isinstance(space, CompositeSpace):
                if not isinstance(dtype, tuple):
                    raise TypeError("Tree structure mismatch.")

                return tuple(check_dtype_tree(dt, c)
                             for dt, c in safe_zip(dtype, space.components))
            else:
                if not (dtype is None or isinstance(dtype, str)):
                    raise TypeError("Tree structure mismatch.")

                return super_self._clean_dtype_arg(dtype)

        if dtype is None or isinstance(dtype, str):
            dtype = super_self._clean_dtype_arg(dtype)
            return make_dtype_tree(dtype, self)
        else:
            return check_dtype_tree(dtype, self)


class NullSpace(Space):
    """
    A space that contains no data. As such, it has the following quirks:

    * Its validate()/np_validate() methods only accept None.
    * Its dtype string is "Nullspace's dtype".
    * The source name associated to this Space is the empty string ('').
    """

    # NullSpaces don't support validation callbacks, since they only take None
    # as data batches.
    def __init__(self):
        super(NullSpace, self).__init__()

    def __str__(self):
        """
        .. todo::

            WRITEME
        """
        return "NullSpace"

    def __eq__(self, other):
        """
        .. todo::

            WRITEME
        """
        return type(self) == type(other)

    def __hash__(self):
        """
        .. todo::

            WRITEME
        """
        return hash(type(self))

    @property
    def dtype(self):
        """
        .. todo::

            WRITEME
        """
        return "%s's dtype" % self.__class__.__name__

    @dtype.setter
    def dtype(self, new_dtype):
        """
        .. todo::

            WRITEME
        """
        if new_dtype != self.dtype:
            raise TypeError('%s can only take the bogus dtype "%s"' %
                            (self.__class__.__name__,
                             self.dtype))

        # otherwise, do nothing

    @functools.wraps(Space.make_theano_batch)
    def make_theano_batch(self, name=None, dtype=None):
        return None

    @functools.wraps(Space._validate_impl)
    def _validate_impl(self, is_numeric, batch):
        if batch is not None:
            raise TypeError('NullSpace only accepts None, as a dummy data '
                            'batch. Instead, got %s of type %s'
                            % (batch, type(batch)))

    @functools.wraps(Space._format_as_impl)
    def _format_as_impl(self, is_numeric, batch, space):
        assert isinstance(space, NullSpace)
        return None

    @functools.wraps(Space._batch_size_impl)
    def _batch_size_impl(self, is_numeric, batch):
        # There is no way to know how many examples would actually
        # have been in the batch, since it is empty. We return 0.
        self._validate(is_numeric, batch)
        return 0