python source code of basic

"""
Parent classes describing a layer, model or operator
"""
__docformat__ = 'restructedtext en'
__authors__ = ("Razvan Pascanu "
               "KyungHyun Cho "
               "Caglar Gulcehre ")
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"


import numpy
import copy
import cPickle as pkl

import theano
import theano.tensor as TT
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from groundhog.utils import utils
from groundhog.utils.utils import id_generator

class Container(object):
    """
    Root class. It contains some properties one would expect from any
    derived class.
    """

    def __init__(self):
        self.floatX = theano.config.floatX
        # Parameters of the model (shared variables)
        self.params                 = []
        # Factor scaling the gradient of the cost with respect to some
        # parameter
        self.params_grad_scale      = []
        # List of shared variables holding the noise used when applying
        # weight noise
        self.noise_params           = []
        # List of functions that compute the shape of a parameter
        self.noise_params_shape_fn  = []
        # Updates that the model require at each step
        self.updates                = []
        # Additional gradients that need to be added to the gradients of the
        # cost of a model with respect to the parameters
        self.additional_gradients   = []
        # Theano variables representing the inputs required to compute the
        # value of the container
        self.inputs                 = []
        # Schedules for updating shared variables involved in the
        # computation of the container
        self.schedules              = []
        # Additional properties that can be computed beside the output of
        # the container
        self.properties             = []

    def tensor_from_layer(self, arg):
        """
        Grab the theano tensor representing the computation of this
        layer/operator iff `arg` is a layer
        """
        if isinstance(arg, Container):
            return arg.out
        else:
            return arg

    def add_schedule(self, sched):
        """
        Add a new schedule to the list of schedules
        """
        self.schedules += [sched]

    def add_schedules(self, scheds):
        """
        Add a list of schedules to the current list of schedules
        """
        self.schedules += scheds

    def tensor_from_layer(self, arg, collect_params=True):
        """
        Grab the theano tensor representing the computation of this
        layer/operator iff `arg` is a layer.

        :type collect_params: bool
        :param collect_params: Flag. If true, also collect the parameters
            and inputs of the layer `arg` and make them parameters and inputs
            needed to compute the current layer/operator
        """
        if not collect_params:
            if isinstance(arg, Container):
                return arg.out
            else:
                return arg


        if isinstance(arg, Container):
            self.merge_params(arg)
            return arg.out
        elif isinstance(arg, theano.gof.Variable):
            inps = [x for x in theano.gof.graph.inputs([arg])
                    if not isinstance(x, (TT.Constant, theano.compile.SharedVariable))]
            self.add_inputs(inps)
            return arg
        else:
            return arg


    def add_inputs(self, inps):
        """
        Add to the current list of inputs the tensors in the `inps` list
        """
        if not isinstance(inps, (list, tuple)):
            inps = [inps]
        for inp in inps:
            if inp not in self.inputs:
                self.inputs = self.inputs + [inp]

    def merge_params(self, model):
        """
        Add to the current properties of the container (params, schedules,
        etc.) those of the layer/operator/model `model`.
        """
        new_params_grad_scale = [ps for ps, param in zip(model.params_grad_scale,
                                                         model.params)
                                 if param not in self.params]
        new_params  = [param for param in model.params if param not in self.params]

        assert len(new_params_grad_scale) == len(new_params)

        new_noise_params_shape_fn = [shape_fn
                for shape_fn,noise_param in zip(model.noise_params_shape_fn,
                                                model.noise_params)
                                    if noise_param not in self.noise_params]
        new_noise_params = [noise_param
                for noise_param in model.noise_params
                       if noise_param not in self.noise_params]
        new_inputs =[inp for inp in model.inputs if inp not in self.inputs]
        new_schedules =[schedule for schedule in model.schedules
                       if schedule not in self.schedules]
        new_updates = [update for update in model.updates
                       if update not in self.updates]
        new_additional_gradients = [additional_gradient
                for additional_gradient in model.additional_gradients
                       if additional_gradient not in self.additional_gradients]
        new_properties = [prop for prop in model.properties
                          if prop not in self.properties]

        self.params += new_params
        self.params_grad_scale += new_params_grad_scale
        assert len(self.params) == len(self.params_grad_scale)
        if hasattr(self, 'grads'):
            self.grads += [ 0 for param in new_params]
        self.noise_params += new_noise_params
        self.noise_params_shape_fn += new_noise_params_shape_fn
        self.inputs += new_inputs
        self.schedules += new_schedules
        self.updates += new_updates
        self.additional_gradients += new_additional_gradients
        self.properties += new_properties

    def save(self, filename):
        """
        Save the model to file `filename`
        """
        vals = dict([(x.name, x.get_value()) for x in self.params])
        numpy.savez(filename, **vals)

    def load(self, filename):
        """
        Load the model.
        """
        vals = numpy.load(filename)
        for p in self.params:
            if p.name in vals:
                print 'Loading', p.name
                p.set_value(vals[p.name])

class Layer(Container):
    """
    Parent class for Layers.
    A layer is a segment of a computational pipeline. It is different from a
    model in the sense that it does not necessarly have a cost or gradients
    defined, neither does it respect the interface expected from the
    trainers.

    """
    def __init__(self, n_in=0, n_out=0, rng=None, name=None):
        super(Layer, self).__init__()
        if name:
            self.name = name
        else:
            self.name = 'unknown_'+ id_generator(4)
        self.rng = rng
        self.n_in = n_in
        self.n_out = n_out
        self.n_hid = n_out
        self.floatX = theano.config.floatX

    def reshape(self, shape):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        new_obj.out = new_obj.out.reshape(shape)
        return new_obj

    shape = property(lambda self: self.out.shape)

    def __str__(self):
        return self.name

    def __add__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other_var = new_obj.tensor_from_layer(other)
        new_obj.out = new_obj.out + other_var
        # Summing cost layers:
        if hasattr(new_obj, 'grads') and hasattr(other, 'grads'):
            for param, grad_param in zip(other.params, other.grads):
                pos = new_obj.params.index(param)
                new_obj.grads[pos] += grad_param
        elif hasattr(new_obj, 'grads') and \
                isinstance(other, theano.gof.Variable) and \
                other.ndim == 0:
            other_grads = TT.grad(other, new_obj.params,
                                  disconnected_inputs='ignore')
            new_obj.grads = [x + y for x,y in zip(new_obj.grads,
                                                  other_grads)]
        elif hasattr(new_obj, 'grads'):
            raise ValueError('I do not know how to compute the gradients'
                             ' of the added term' + str(other) + '. Call'
                             ' train on it if it is an output layer')
        return new_obj

    def __sub__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other_var = new_obj.tensor_from_layer(other)
        new_obj.out = new_obj.out - other_var
        if hasattr(new_obj, 'grads') and hasattr(other, 'grads'):
            for param, grad_param in zip(other.params, other.grads):
                pos = new_obj.params.index(param)
                new_obj.grads[pos] -= grad_param
        elif hasattr(new_obj, 'grads') and \
                isinstance(other, theano.gof.Variable) and \
                other.ndim == 0:
            other_grads = TT.grad(other, new_obj.params,
                                  disconnected_inputs='ignore')
            new_obj.grads = [x - y for x,y in zip(new_obj.grads,
                                                  other_grads)]
        elif hasattr(new_obj, 'grads'):
            raise ValueError('I do not know how to compute the gradients'
                             ' of the subtracted term' + str(other) + '. Call'
                             ' train on it if it is an output layer')
        return new_obj

    def __mul__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other_var = self.tensor_from_layer(other)

        if hasattr(new_obj, 'grads') and hasattr(other, 'grads'):
            new_obj.grads = [ x * other_var for x in new_obj.grads]
            for param, grad_param in zip(other.params, other.grads):
                pos = new_obj.params.index(param)
                new_obj.grads[pos] += new_obj.out * grad_param
        elif hasattr(new_obj, 'grads') and \
                isinstance(other, theano.gof.Variable) and \
                other.ndim == 0:

            new_obj.grads = [ x * other_var for x in new_obj.grads]
            other_grads = TT.grad(other, new_obj.params,
                                  disconnected_inputs='ignore')
            new_obj.grads = [x + new_obj.cost * y
                             for x,y in zip(new_obj.grads,
                                                  other_grads)]
        elif hasattr(new_obj, 'grads'):
            raise ValueError('I do not know how to compute the gradients'
                             ' of the subtracted term' + str(other) + '. Call'
                             ' train on it if it is an output layer')
        new_obj.out = new_obj.out * other_var
        return new_obj


    def __div__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other_var = new_obj.tensor_from_layer(other)
        if hasattr(new_obj, 'grads') and hasattr(other, 'grads'):
            new_obj.grads = [ x * other_var for x in new_obj.grads]
            for param, grad_param in zip(other.params, other.grads):
                pos = new_obj.params.index(param)
                new_obj.grads[pos] -= new_obj.out * grad_param
            new_obj.grads = [ x / (other_var**2) for x in new_obj.grads]
        elif hasattr(new_obj, 'grads') and \
                isinstance(other, theano.gof.Variable) and \
                other.ndim == 0:

            new_obj.grads = [ x * other_var for x in new_obj.grads]
            other_grads = TT.grad(other, new_obj.params,
                                  disconnected_inputs='ignore')
            new_obj.grads = [(x - new_obj.cost * y)/ (other_var**2)
                             for x,y in zip(new_obj.grads,
                                                  other_grads)]
        elif hasattr(new_obj, 'grads'):
            raise ValueError('I do not know how to compute the gradients'
                             ' of the subtracted term' + str(other) + '. Call'
                             ' train on it if it is an output layer')
        new_obj.out = new_obj.out / other_var
        return new_obj

    def __abs__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        new_obj.out = abs(new_obj.out)
        if hasattr(new_obj, 'grads'):
            new_obj.grads = [TT.sgn(new_obj.out) * x for x in new_obj.grads]
        return new_obj

    def __pow__(self, power):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        power = self.tensor_from_layer(power)
        new_obj.out = new_obj.out**power
        if hasattr(new_obj, 'grads'):
            raise NotImplemented
        return new_obj


    def __lt__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other = self.tensor_from_layer(other)
        new_obj.out = new_obj.out.__lt__(other)
        if hasattr(new_obj, 'grads'):
            raise NotImplemented
        return new_obj

    def __le__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other = self.tensor_from_layer(other)
        new_obj.out = new_obj.out.__le__(other)
        if hasattr(new_obj, 'grads'):
            raise NotImplemented
        return new_obj

    def __gt__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other = self.tensor_from_layer(other)
        new_obj.out = new_obj.out.__gt__(other)
        if hasattr(new_obj, 'grads'):
            raise NotImplemented
        return new_obj

    def __ge__(self, other):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        other = self.tensor_from_layer(other)
        new_obj.out = new_obj.out.__ge__(other)
        if hasattr(new_obj, 'grads'):
            raise NotImplemented
        return new_obj

    def __getitem__(self, pos):
        assert hasattr(self, 'out'), 'all layers need a default output'
        new_obj = utils.copy(self)
        pos = self.tensor_from_layer(pos)
        new_obj.out = new_obj.out.__getitem__(pos)
        if hasattr(new_obj, 'grads'):
            raise NotImplemented
        return new_obj

    def _as_TensorVariable(self):
        print ('WARNING: you might loose track of parameters or inputs '\
               'because layer ' + self.name +' is being converted to a '\
               'theano variable')
        return self.out



    def validate(self, **kwargs):
        """
        Recompute the cost error (without the gradients)
        It only works for output layers !
        """
        if not hasattr(self, 'get_cost'):
            raise TypeError('Non-output layer does not support this method')
        new_obj = utils.copy(self)
        try:
            o_args, o_kwargs = new_obj.prev_args
        except:
            o_args, o_kwargs = ([], {})

        kwargs = dict([(k, new_obj.tensor_from_layer(v)) for k,v in kwargs.items()])
        for (k,v) in kwargs.items():
            o_kwargs[k] = v
        new_obj.prev_args = (o_args, o_kwargs)
        new_obj.get_cost(*o_args, **o_kwargs)
        return new_obj

    def train(self, **kwargs):
        """
        Compute the cost and gradients of the current layer with respect to
        its parameters.
        ! Only works for output layers
        """
        if not hasattr(self, 'get_grads'):
            raise TypeError('Non-output layer does not support this method')
        new_obj = utils.copy(self)
        try:
            o_args, o_kwargs = new_obj.prev_args
        except:
            o_args, o_kwargs = ([], {})
        kwargs = dict([(k, new_obj.tensor_from_layer(v)) for k,v in kwargs.items()])
        for (k,v) in kwargs.items():
            o_kwargs[k] = v
        new_obj.prev_args = (o_args, o_kwargs)
        new_obj.get_grads(*o_args, **o_kwargs)
        return new_obj

    def get_sample(self, **kwargs):
        """
        Get a sample from the curren model.
        ! Only works for output layers
        """
        if not hasattr(self, 'get_cost'):
            raise TypeError('Non-output layer does not support this method')
        new_obj = utils.copy(self)
        try:
            o_args, o_kwargs = new_obj.prev_args
        except:
            o_args, o_kwargs = ([], {})
        kwargs = dict([(k, new_obj.tensor_from_layer(v)) for k,v in kwargs.items()])
        for (k,v) in kwargs.items():
            o_kwargs[k] = v
        new_obj.prev_args = (o_args, o_kwargs)
        sample = new_obj.compute_sample(*o_args, **o_kwargs)
        return sample


    def __call__(self, *args, **kwargs):
        """
        Compose this layer with the inputs provided
        """
        if 'one_step' in kwargs and kwargs['one_step']:
            del kwargs['one_step']
            args = [self.tensor_from_layer(arg, False) for arg in args]
            kwargs = dict([(k, self.tensor_from_layer(v, False))
                           for k,v in kwargs.items()])
            if hasattr(self, 'step_fprop'):
                return self.step_fprop(*args, **kwargs)
            else:
                return self.fprop(*args, **kwargs)
        new_obj = utils.copy(self)

        args = [new_obj.tensor_from_layer(arg) for arg in args]
        kwargs = dict([(k, new_obj.tensor_from_layer(v)) for k,v in kwargs.items()])
        if 'do' in kwargs:
            kind = kwargs['do']
            del kwargs['do']
        else:
            kind = 'fprop'
        if 'one_step' in kwargs:
            del kwargs['one_step']
        new_obj.prev_args = (args, kwargs)
        if kind == 'fprop':
            new_obj.fprop(*args, **kwargs)
        elif kind == 'eval':
            new_obj.get_cost(*args, **kwargs)
        elif kind == 'train':
            new_obj.get_grads(*args, **kwargs)
        elif kind == 'run':
            return new_obj.run(*args, **kwargs)
        return new_obj


    def _init_params(self):
        raise NotImplementedError

    def fprop(self, state_below, state_before=None, state_after=None):
        raise NotImplementedError

class Model(Container):
    """
    Model class. It respects the interface expected by the trainer.
    """
    def __init__(self, output_layer,
                 sample_fn,
                 indx_word="/data/lisa/data/PennTreebankCorpus/dictionaries.npz",
                 indx_word_src=None,
                 rng =None):
        super(Model, self).__init__()
        if rng == None:
            rng = numpy.random.RandomState(123)
        assert hasattr(output_layer,'grads'), \
                'The model needs to have gradients defined'
        self.rng = rng
        self.trng = RandomStreams(rng.randint(1000)+1)
        self.sample_fn = sample_fn
        self.indx_word = indx_word
        self.indx_word_src = indx_word_src
        self.param_grads = output_layer.grads
        self.params = output_layer.params
        self.updates = output_layer.updates
        self.noise_params = output_layer.noise_params
        self.noise_params_shape_fn = output_layer.noise_params_shape_fn
        self.inputs = output_layer.inputs
        self.params_grad_scale = output_layer.params_grad_scale
        self.train_cost = output_layer.cost
        self.out = output_layer.out
        self.schedules = output_layer.schedules
        self.output_layer = output_layer
        self.properties = output_layer.properties
        self._get_samples = output_layer._get_samples

    def get_schedules(self):
        return self.schedules

    def validate(self, data_iterator):
        raise NotImplemented

    def clone(**new_inputs):
        new_obj = utils.copy(self)
        # Reorder inputs
        assert len(new_obj.inputs) == len(new_inputs.items())
        pairs=[(x, new_inputs[x.name]) for x in inputs]
        new_obj.inputs = new_inputs.values()
        new_obj.out = theano.clone(new_obj.out, replace=pairs)
        if hasattr(new_obj, 'cost'):
            new_obj.cost = theano.clone(new_obj.cost, replace=pairs)
        if hasattr(new_obj, 'grads'):
            new_obj.grads = theano.clone(new_obj.grads, replace=pairs)
        if hasattr(new_obj, 'sample'):
            new_obj.sample = theano.clone(new_obj.sample, replace=pairs)
        return new_obj

    def __call__(self, *args, **kwargs):
        return self.run(*args, **kwargs)

class Operator(Layer):
    def __init__(self,
                 apply_operator=None,
                 n_in=0,
                 n_out = 0):
        super(Operator, self).__init__(n_in, n_out, rng=None)
        self.apply_operator = apply_operator

    def __call__(self, *args, **kwargs):
        rval = self.apply_operator(*args, **kwargs)
        if 'one_step' in kwargs and kwargs['one_step']:
            return rval
        self.params = rval.params
        self.noise_params = rval.noise_params
        self.noise_params_shape_fn = rval.noise_params_shape_fn
        self.params_grad_scale = rval.params_grad_scale

        self.inputs = rval.inputs
        self.schedules = rval.schedules
        return rval

    def on(self, *args, **kwargs):
        # Experimental
        if not hasattr(self, 'run_fn'):
            self.run_fn = theano.function(self.inputs, self.out)
        return self.run_fn(*args, **kwargs)