python source code of model

### ModelBuilder class and associated helper methods

from mpi_learn.utils import load_model, get_device_name
from .optimizer import OptimizerBuilder
import numpy as np
import copy
import sys
import six
import logging

def session(f):
    def wrapper(*args, **kwargs):
        self_obj = args[0]
        if hasattr(self_obj, 'session'):
            with self_obj.graph.as_default():
                with self_obj.session.as_default():
                    return f(*args, **kwargs)
        else:
            return f(*args, **kwargs)
    return wrapper

class MPIModel(object):
    """Class that abstract all details of the model
    """
    def __init__(self, model=None, models=None):
        self.model = model
        self.models = models
        self.histories = {}
        if model and models:
            raise Exception("Cannot specify single and multiple models")

    @session
    def print_metrics(self, metrics):
        if self.model:
            names = self.model.metrics_names
            for name, metric in zip( names, metrics ):
                logging.info("{0}: {1:.3f}".format(name,metric))
        else:
            for im,m in enumerate(self.models):
                names = m.metrics_names
                ametric = metrics[im,...]
                logging.info('model {0} {1}'.format( im ,m.name))
                for name, metric in zip( names,ametric):
                    logging.info("{0}: {1:.3f}".format(name,metric))
                
    @session
    def get_logs(self, metrics, val=False):
        if self.model:
            if val:
                return { 'val_'+name:np.asscalar(metric) for name, metric in
                         zip( self.model.metrics_names, metrics ) }
            else:
                return { name:np.asscalar(metric) for name, metric in
                         zip( self.model.metrics_names, metrics ) }
        else:
            logs = []
            for im,m in enumerate(self.models):
                ametrics = metrics[im,...]
                if val:
                    logs.append({ 'val_'+name:np.asscalar(metric) for name, metric in
                             zip(m.metrics_names, ametrics ) })
                else:
                    logs.append({ name:np.asscalar(metric) for name, metric in
                                  zip(m.metrics_names, ametrics ) })
            return logs
        
    @session
    def update_history(self, items, arg_hist):
        if self.model:
            for m,v in items.items():
                arg_hist.setdefault(m,[]).append(v)
        else:
            for im,(m,it) in enumerate(zip(self.models, items)):
                m_name = "model%s"%im
                try:
                    m_name = m.name
                except:
                    logging.warning("no name attr")
                for m,v in it.items():
                    arg_hist.setdefault(m_name,{}).setdefault(m,[]).append(v)
        self.histories = arg_hist
                       
    @session
    def format_update(self):
        if self.model:
            return [ np.zeros( w.shape, dtype=np.float32 ) for w in self.model.get_weights() ]
        else:
            up = []
            for m in self.models:
                up.append( [ np.zeros( w.shape, dtype=np.float32 ) for w in m.get_weights() ] )
            return up
    
    @session
    def get_weights(self):
        if self.model:
            return self.model.get_weights()
        else:
            l_weights = []
            for m in self.models:
                l_weights.append( m.get_weights() )
            return l_weights
    
    @session
    def set_weights(self, w ):
        if self.model:
            self.model.set_weights( w )
        else:
            for m,mw in zip(self.models, w ):
                m.set_weights( mw )
            
    #def history(self):
    #    if self.model:
    #        return self.model.history.history
    #    else:
    #        return [m.history.history for m in self.models]
            
    #def set_history(self, h):
    #    if self.model:
    #        self.model.history = h()
    #    else:
    #        for m in self.models:
    #            m.history = h()

    @session
    def compile(self, **args):
        if 'optimizer' in args and isinstance(args['optimizer'], OptimizerBuilder):
            opt_builder = args['optimizer']
        else:
            opt_builder = None
        if self.model:
            if opt_builder:
                args['optimizer'] = opt_builder.build()
            self.model.compile( **args )
        else:
            for m in self.models:
                if opt_builder:
                    args['optimizer'] = opt_builder.build()
                m.compile( **args )

    @session
    def train_on_batch(self, **args):
        if self.model:
            return np.asarray(self.model.train_on_batch( **args ))
        else:
            h = []
            for m in self.models:
                h.append(m.train_on_batch( **args ))
            return np.asarray(h)
                
    @session
    def test_on_batch(self, **args):
        if self.model:
            return np.asarray(self.model.test_on_batch( **args ))
        else:
            h= []
            for m in self.models:
                h.append(m.test_on_batch( **args ))
            return np.asarray(h)

    @session
    def figure_of_merit(self, **args):
        ## runs like predict trace, and provides a non differentiable figure of merit for hyper-opt
        ## can of course be the validation loss
        if self.model:
            ## return a default value from the validation history
            return (1.-self.histories['val_acc'][-1])
            #return self.histories['val_loss'][-1]
        else: 
            return 0.


    @session
    def save(self, *args,**kwargs):
        if self.model:
            self.model.save( *args, **kwargs )
        else:
            for im,m in enumerate(self.models):
                fn = 'm%d_%s'%( im, args[0])
                m.save( fn, **kwargs )
            
    def close(self):
        if hasattr(self, 'session'):
            self.session.close()
                
class MPITModel(MPIModel):
    """
    adapter of a torch model to fit in the mpi_learn interface
    """
    def __init__(self, model=None, gpus=0):
        MPIModel.__init__(self,model = model)
        self.gpus = gpus
        self.metrics_names = ["loss"]
        self.loss = None
        self.optimizer = None
        self.metrics = []
        self.loss_functions = None

        if self.gpus>0:
            self.model = self.model.cuda()
        if self.gpus >1:
            import torch.nn as nn
            self.model = nn.DataParallel(self.model)
        setattr(self.model, 'metrics_names', self.metrics_names)
        
    def format_update(self):
        ws = self.get_weights()
        return [ np.zeros( w.shape, dtype=np.float32 ) for w in ws]
    
    def get_weights(self):
        if self.gpus > 0:
            return copy.deepcopy([i.data.cpu().numpy() for i in list(self.model.parameters())])
        else:
            return copy.deepcopy([i.data.numpy() for i in list(self.model.parameters())])

    def set_weights(self, weights=[]):
        import torch # Don't put it outside because it will break Tensorflow
        for i,weight in enumerate(weights):
            list(self.model.parameters())[i].data.copy_(torch.from_numpy(weight))

    def compile(self, **kwargs):
        import torch.nn
        import torch.optim

        ### need to map the loss string into the relevant torch object
        self.loss = self._build_loss(kwargs['loss'])
        for metric in kwargs['metrics']:
            if metric.lower() == 'acc' or metric.lower() == 'accuracy':
                self.metrics_names.append('acc')
        ## we need a mapping of the kwargs into what is the optimizer that is getting used
        opt_builder = kwargs.get('optimizer')
        if opt_builder is None:
            self.optimizer = torch.optim.SGD(self.model.parameters(), 1.)
        else:
            self.optimizer = opt_builder.build_torch(self.model)

    def _build_loss(self, loss):
        import torch.nn
        if loss and loss.endswith('Loss'):
            loss = loss[:-4]
        lookup = {
            # Keras mapped losses
            'categorical_crossentropy': torch.nn.CrossEntropyLoss,
            'mean_squared_error' : torch.nn.MSELoss,
            'mean_absolute_error' : torch.nn.L1Loss,
            # PyTorch losses 
            'L1': torch.nn.L1Loss,
            'MSE': torch.nn.MSELoss,
            'CrossEntropy': torch.nn.CrossEntropyLoss,
            #'CTC': torch.nn.CTCLoss,
            'NLL': torch.nn.NLLLoss,
            'PoissonNLL': torch.nn.PoissonNLLLoss,
            'KLDiv': torch.nn.KLDivLoss,
            'BCE': torch.nn.BCELoss,
            'BCEWithLogits': torch.nn.BCEWithLogitsLoss,
            'MarginRanking': torch.nn.MarginRankingLoss,
            'HingeEmbedding': torch.nn.HingeEmbeddingLoss,
            'MultiLabelMargin': torch.nn.MultiLabelMarginLoss,
            'SmoothL1': torch.nn.SmoothL1Loss,
            'SoftMargin': torch.nn.SoftMarginLoss,
            'MultiLabelSoftMargin': torch.nn.MultiLabelSoftMarginLoss,
            'CosineEmbedding': torch.nn.CosineEmbeddingLoss,
            'MultiMargin': torch.nn.MultiMarginLoss,
            'TripletMargin': torch.nn.TripletMarginLoss
        }
        if loss in lookup:
            return lookup[loss]()
        else:
            logger.warning("No loss mapping found, using CrossEntropyLoss")
            return torch.nn.CrossEntropyLoss()

    def _accuracy(self, output, target, topk=(1,)):
        """Computes the precision@k for the specified values of k"""
        maxk = max(topk)
        batch_size = target.size(0)
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        
        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(1. / batch_size))
        return res
    def _convert_to_tensor(self, data ):
        import torch
        return torch.from_numpy(data)
        #if hasattr(data, 'keys'):
        #    out = [torch.from_numpy(data[key]) for key in sorted(data.keys())]
        #else:
        #    out = torch.from_numpy(data)
        #return out
        
    def train_on_batch(self, x=None, y=None, *args, **kwargs):
        '''Perform a single gradient update on a single batch of data.
        Attributes:
        x: Pytorch tensor of training data
        y: Pytorch tensor of target data

        Return:
        A list of scalar training loss and a metric specified in the compile method.
        '''
        from torch.autograd import Variable
        x = self._convert_to_tensor(x)
        y = self._convert_to_tensor(y)
        self.model.train()
        self.optimizer.zero_grad()
        target = y.long().max(1)[1] # Pytorch doesn't need 1-hot encoded label. Only the indices of classes.
        #target = y.long()
        if self.gpus>0:
            x = x.cuda()
            target = target.cuda()
        x = Variable(x)
        pred = self.model.forward(x)
        loss = self.loss(pred, Variable(target))
        loss.backward()
        self.optimizer.step()
        l_data = loss.data.numpy() if self.gpus == 0 else loss.data.cpu().numpy()
        self.metrics = [l_data] if l_data.shape==() else [l_data[0]]        
        if 'acc' in self.metrics_names: # compute the accuracy
            acc = self._accuracy(pred.data, target, topk=(1,))[0]
            if self.gpus > 0: acc = acc.cpu()
            self.metrics.append(acc.numpy()[0])
        return np.asarray(self.metrics)


    def test_on_batch(self, x=None, y=None, *args, **kwargs):
        '''Test the model on a single batch of samples. No gradient update is performed.
        Attributes:
        x: Pytorch tensor of test data
        y: Pytorch tensor of target data

        Return:
        A list of scalar training loss and a metric specified in the compile method.
        '''
        from torch.autograd import Variable
        x = self._convert_to_tensor(x)
        y = self._convert_to_tensor(y)        
        self.model.eval()
        target = y.long().max(1)[1] # Pytorch doesn't need 1-hot encoded label. Only the indices of classes.
        #target =y.long()
        if self.gpus > 0:
            x = x.cuda()
            target = target.cuda()
        pred = self.model.forward(Variable(x, volatile=True))
        loss = self.loss(pred, Variable(target, volatile=True))
        l_data = loss.data.numpy() if self.gpus == 0 else loss.data.cpu().numpy()
        self.metrics = [l_data] if l_data.shape==() else [l_data[0]]        
        if 'acc' in self.metrics_names: # compute the accuracy
            acc = self._accuracy(pred.data, target, topk=(1,))[0]
            if self.gpus > 0: acc = acc.cpu()
            self.metrics.append(acc.numpy()[0])
        return np.asarray(self.metrics)

    def save(self, *args,**kwargs):
        import torch
        weights_filename = args[0]+'_w'
        arch_filename = args[0]
        torch.save( self.model.state_dict(), weights_filename)
        torch.save( self.model, arch_filename)
            
class ModelBuilder(object):
    """Class containing instructions for building neural net models.
        Derived classes should implement the build_model and get_backend_name
        functions.

        Attributes:
            comm: MPI communicator containing all running MPI processes
    """

    def __init__(self, comm):
        """Arguments:
            comm: MPI communicator 
        """
        self.comm = comm

    def get_device_name(self, device):
        """Should return a device name under a desired convention"""
        return device

    def build_model(self):
        """Should return an uncompiled Keras model."""
        raise NotImplementedError

    def get_backend_name(self):
        """Should return the name of backend framework."""
        raise NotImplementedError

class ModelFromJson(ModelBuilder):
    """ModelBuilder class that builds from model architecture specified
        in a JSON file.
        Attributes:
            filename: path to JSON file specifying model architecture
    """

    def __init__(self, comm, filename=None, custom_objects={}, weights=None):
        self.filename = filename
        self.weights = weights
        self.custom_objects = custom_objects
        super(ModelFromJson, self).__init__(comm)

    def build_model(self, local_session=True):
        if type(self.filename) == list:
            models = []
            for fn in self.filename:
                models.append(load_model(filename=fn))
            return MPIModel(models = models)
        else:        
            return MPIModel(model=load_model(filename=self.filename, custom_objects=self.custom_objects, weights_file=self.weights))

    def get_backend_name(self):
        return 'keras'

class ModelTensorFlow(ModelBuilder):
    """ModelBuilder class that builds from model architecture specified
        in a JSON file. Uses Tensorflow and builds the model on the 
        specified GPU.
        Attributes:
            source: path to JSON file specifying model architecture
                    or Keras model to be cloned
            device: name of the device to use (ex: "/gpu:2")
    """

    def __init__(self, comm, source, device_name='cpu', 
            custom_objects={}, weights=None):
        if isinstance(source, six.string_types):
            if source.endswith('.py'):
                module = __import__(source.replace('.py',''))
                self.model = module.get_model()
                self.filename = None
            else:
                self.filename = source
                self.model = None
        else:
            self.filename = None
            self.model = source
        self.weights = weights
        self.custom_objects = custom_objects
        self.device = self.get_device_name(device_name)
        super(ModelTensorFlow, self).__init__(comm)

    def get_device_name(self, device):
        """Returns a TF-style device identifier for the specified device.
            input: 'cpu' for CPU and 'gpuN' for the Nth GPU on the host"""
        if device == 'cpu':
            dev_num = 0
            dev_type = 'cpu'
        elif device.startswith('gpu'):
            try:
                dev_num = int(device[3:])
                dev_type = 'gpu'
            except ValueError:
                logging.warning("GPU number could not be parsed from {}; using CPU".format(device))
                dev_num = 0
                dev_type = 'cpu'
        else:
            logging.warning("Please specify 'cpu' or 'gpuN' for device name; using CPU")
            dev_num = 0
            dev_type = 'cpu'
        return get_device_name(dev_type, dev_num, backend='tensorflow')

    def build_model_aux(self):
        import keras.backend as K

        with K.tf.device(self.device):
            if type(self.filename) == list:
                models = []
                self.weights = self.weights.split(',') if self.weights else [None]*len(self.filename)
                for fn,w in zip(self.filename, self.weights):
                    models.append(load_model(filename=fn, weights_file=w))
                return MPIModel(models = models)
            else:
                model = load_model(filename=self.filename, model=self.model,
                                custom_objects=self.custom_objects, weights_file=self.weights)
                return MPIModel(model = model)


    def build_model(self, local_session = True):
        import keras.backend as K

        if local_session:
            graph = K.tf.Graph()
            session = K.tf.Session(graph=graph, config=K.tf.ConfigProto(
                allow_soft_placement=True, log_device_placement=False,
                gpu_options=K.tf.GPUOptions(
                        per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) )

            with graph.as_default():
                with session.as_default():
                    import keras.backend as K
                    ret_model = self.build_model_aux()
                    ret_model.session = session
                    ret_model.graph = graph
                    return ret_model
        else:
            K.set_session( K.tf.Session( config=K.tf.ConfigProto(
                allow_soft_placement=True, log_device_placement=False,
                gpu_options=K.tf.GPUOptions(
                    per_process_gpu_memory_fraction=1./self.comm.Get_size()) ) ) )
            return self.build_model_aux()

    def get_backend_name(self):
        return 'tensorflow'

class ModelPytorch(ModelBuilder):
    def __init__(self, comm, source,
                 weights = None,
                 gpus=0):
        super(ModelPytorch,self).__init__(comm)
        if isinstance(source, six.string_types):
            if source.endswith('.py'):
                module = __import__(source.replace('.py',''))
                self.model = module.get_model()
                self.filename = None
            else:
                self.filename = source
                self.model = None
        else:
            self.filename = None
            self.model = source
        self.weights = weights
        self.gpus=gpus

    def build_model(self, local_session=True):
        import torch
        if self.filename is not None:
            model = torch.load(self.filename)
        elif self.model is not None:
            model = copy.deepcopy(self.model)
        if self.weights:
            wd = torch.load(self.weights)
            model.load_state_dict(wd)
        return MPITModel(model=model, gpus=self.gpus)

    def get_backend_name(self):
        return 'pytorch'