python source code of dpf

import os
import numpy as np
import sonnet as snt
import tensorflow as tf
import matplotlib.pyplot as plt

from utils.data_utils import wrap_angle, compute_staticstics, split_data, make_batch_iterator, make_repeating_batch_iterator
from utils.method_utils import atan2, compute_sq_distance
from utils.plotting_utils import plot_maze, show_pause

if tf.__version__ == '1.1.0-rc1' or tf.__version__ == '1.3.0':
    from tensorflow.python.framework import ops
    @ops.RegisterGradient("FloorMod")
    def _mod_grad(op, grad):
        x, y = op.inputs
        gz = grad
        x_grad = gz
        y_grad = None  # tf.reduce_mean(-(x // y) * gz, axis=[0], keep_dims=True)[0]
        return x_grad, y_grad


class DPF():

    def __init__(self, init_with_true_state, learn_odom, use_proposer, propose_ratio, proposer_keep_ratio, min_obs_likelihood):
        """
        :param init_with_true_state:
        :param learn_odom:
        :param use_proposer:
        :param propose_ratio:
        :param particle_std:
        :param proposer_keep_ratio:
        :param min_obs_likelihood:
        """

        # store hyperparameters which are needed later
        self.init_with_true_state = init_with_true_state
        self.learn_odom = learn_odom
        self.use_proposer = use_proposer and not init_with_true_state  # only use proposer if we do not initializet with true state
        self.propose_ratio = propose_ratio if not self.init_with_true_state else 0.0

        # define some more parameters and placeholders
        self.state_dim = 3
        self.placeholders = {'o': tf.placeholder('float32', [None, None, 24, 24, 3], 'observations'),
                             'a': tf.placeholder('float32', [None, None, 3], 'actions'),
                             's': tf.placeholder('float32', [None, None, 3], 'states'),
                             'num_particles': tf.placeholder('float32'),
                             'keep_prob': tf.placeholder_with_default(tf.constant(1.0), []),
                             }
        self.num_particles_float = self.placeholders['num_particles']
        self.num_particles = tf.to_int32(self.num_particles_float)

        # build learnable modules
        self.build_modules(min_obs_likelihood, proposer_keep_ratio)


    def build_modules(self, min_obs_likelihood, proposer_keep_ratio):
        """
        :param min_obs_likelihood:
        :param proposer_keep_ratio:
        :return: None
        """

        # MEASUREMENT MODEL

        # conv net for encoding the image
        self.encoder = snt.Sequential([
            snt.nets.ConvNet2D([16, 32, 64], [[3, 3]], [2], [snt.SAME], activate_final=True, name='encoder/convnet'),
            snt.BatchFlatten(),
            lambda x: tf.nn.dropout(x,  self.placeholders['keep_prob']),
            snt.Linear(128, name='encoder/linear'),
            tf.nn.relu
        ])

        # observation likelihood estimator that maps states and image encodings to probabilities
        self.obs_like_estimator = snt.Sequential([
            snt.Linear(128, name='obs_like_estimator/linear'),
            tf.nn.relu,
            snt.Linear(128, name='obs_like_estimator/linear'),
            tf.nn.relu,
            snt.Linear(1, name='obs_like_estimator/linear'),
            tf.nn.sigmoid,
            lambda x: x * (1 - min_obs_likelihood) + min_obs_likelihood
        ], name='obs_like_estimator')

        # motion noise generator used for motion sampling
        self.mo_noise_generator = snt.nets.MLP([32, 32, self.state_dim], activate_final=False, name='mo_noise_generator')

        # odometry model (if we want to learn it)
        if self.learn_odom:
            self.mo_transition_model = snt.nets.MLP([128, 128, 128, self.state_dim], activate_final=False, name='mo_transition_model')

        # particle proposer that maps encodings to particles (if we want to use it)
        if self.use_proposer:
            self.particle_proposer = snt.Sequential([
                snt.Linear(128, name='particle_proposer/linear'),
                tf.nn.relu,
                lambda x: tf.nn.dropout(x,  proposer_keep_ratio),
                snt.Linear(128, name='particle_proposer/linear'),
                tf.nn.relu,
                snt.Linear(128, name='particle_proposer/linear'),
                tf.nn.relu,
                snt.Linear(128, name='particle_proposer/linear'),
                tf.nn.relu,
                snt.Linear(4, name='particle_proposer/linear'),
                tf.nn.tanh,
            ])


    def measurement_update(self, encoding, particles, means, stds):
        """
        Compute the likelihood of the encoded observation for each particle.

        :param encoding: encoding of the observation
        :param particles:
        :param means:
        :param stds:
        :return: observation likelihood
        """

        # prepare input (normalize particles poses and repeat encoding per particle)
        particle_input = self.transform_particles_as_input(particles, means, stds)
        encoding_input = tf.tile(encoding[:, tf.newaxis, :], [1,  tf.shape(particles)[1], 1])
        input = tf.concat([encoding_input, particle_input], axis=-1)

        # estimate the likelihood of the encoded observation for each particle, remove last dimension
        obs_likelihood = snt.BatchApply(self.obs_like_estimator)(input)[:, :, 0]

        return obs_likelihood


    def transform_particles_as_input(self, particles, means, stds):
        return tf.concat([
                   (particles[:, :, :2] - means['s'][:, :, :2]) / stds['s'][:, :, :2],  # normalized pos
                   tf.cos(particles[:, :, 2:3]),  # cos
                   tf.sin(particles[:, :, 2:3])], # sin
                  axis=-1)


    def propose_particles(self, encoding, num_particles, state_mins, state_maxs):
        duplicated_encoding = tf.tile(encoding[:, tf.newaxis, :], [1, num_particles, 1])
        proposed_particles = snt.BatchApply(self.particle_proposer)(duplicated_encoding)
        proposed_particles = tf.concat([
            proposed_particles[:,:,:1] * (state_maxs[0] - state_mins[0]) / 2.0 + (state_maxs[0] + state_mins[0]) / 2.0,
            proposed_particles[:,:,1:2] * (state_maxs[1] - state_mins[1]) / 2.0 + (state_maxs[1] + state_mins[1]) / 2.0,
            atan2(proposed_particles[:,:,2:3], proposed_particles[:,:,3:4])], axis=2)
        return proposed_particles


    def motion_update(self, actions, particles, means, stds, state_step_sizes, stop_sampling_gradient=False):
        """
        Move particles according to odometry info in actions. Add learned noise.

        :param actions:
        :param particles:
        :param means:
        :param stds:
        :param state_step_sizes:
        :param stop_sampling_gradient:
        :return: moved particles
        """

        # 1. SAMPLE NOISY ACTIONS

        # add dimension for particles
        actions = actions[:, tf.newaxis, :]

        # prepare input (normalize actions and repeat per particle)
        action_input = tf.tile(actions / stds['a'], [1, tf.shape(particles)[1], 1])
        random_input = tf.random_normal(tf.shape(action_input))
        input = tf.concat([action_input, random_input], axis=-1)

        # estimate action noise
        delta = snt.BatchApply(self.mo_noise_generator)(input)
        if stop_sampling_gradient:
            delta = tf.stop_gradient(delta)

        # zero-mean the action noise and add to actions
        delta -= tf.reduce_mean(delta, axis=1, keep_dims=True)
        noisy_actions = actions + delta

        # 2. APPLY NOISY ACTIONS
        if self.learn_odom:

            # prepare input (normalize states and actions)
            state_input = self.transform_particles_as_input(particles, means, stds)
            action_input = noisy_actions / stds['a']
            input = tf.concat([state_input, action_input], axis=-1)
            # estimate state delta, scale it, and apply it
            state_delta = snt.BatchApply(self.mo_transition_model)(input)
            new_states = [particles[:, :, i:i+1] + state_delta[:, :, i:i+1] * state_step_sizes[i] for i in range(3)]
            moved_particles = tf.concat(new_states[:2] + [wrap_angle(new_states[2])], axis=-1)

        else:

            # compute sin and cos of the particles
            theta = particles[:, :, 2:3]
            sin_theta = tf.sin(theta)
            cos_theta = tf.cos(theta)
            # move the particles using the noisy actions
            new_x = particles[:, :, 0:1] + (noisy_actions[:, :, 0:1] * cos_theta + noisy_actions[:, :, 1:2] * sin_theta)
            new_y = particles[:, :, 1:2] + (noisy_actions[:, :, 0:1] * sin_theta - noisy_actions[:, :, 1:2] * cos_theta)
            new_theta = wrap_angle(particles[:, :, 2:3] + noisy_actions[:, :, 2:3])
            moved_particles = tf.concat([new_x, new_y, new_theta], axis=-1)

        return moved_particles


    def compile_training_stages(self, sess, batch_iterators, particle_list, particle_probs_list, encodings, means, stds, state_step_sizes, state_mins, state_maxs, learning_rate, plot_task):

        # TRAINING!
        losses = dict()
        train_stages = dict()

        # TRAIN ODOMETRY

        if self.learn_odom:

            # apply model
            motion_samples = self.motion_update(self.placeholders['a'][:,1],
                                                self.placeholders['s'][:, :1],
                                                means, stds, state_step_sizes,
                                                stop_sampling_gradient=True)

            # define loss and optimizer
            sq_distance = compute_sq_distance(motion_samples, self.placeholders['s'][:, 1:2], state_step_sizes)
            losses['motion_mse'] = tf.reduce_mean(sq_distance, name='loss')
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            var_list = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if 'mo_transition_model' in v.name]

            # put everything together
            train_stages['train_odom'] = {
                         'train_op': optimizer.minimize(losses['motion_mse'], var_list=var_list),
                         'batch_iterator_names': {'train': 'train1', 'val': 'val1'},
                         'monitor_losses': ['motion_mse'],
                         'validation_loss': 'motion_mse',
                         'plot': lambda e: self.plot_motion_model(sess, next(batch_iterators['val1']), motion_samples, plot_task) if e % 10 == 0 else None
                         }

        # TRAIN MOTION MODEL

        # apply model
        motion_samples = self.motion_update(self.placeholders['a'][:,1],
                                            tf.tile(self.placeholders['s'][:, :1], [1, self.num_particles, 1]),
                                            means, stds, state_step_sizes)

        # define loss and optimizer
        std = 0.01
        sq_distance = compute_sq_distance(motion_samples, self.placeholders['s'][:, 1:2], state_step_sizes)
        activations_sample = (1 / self.num_particles_float) / tf.sqrt(2 * np.pi * std ** 2) * tf.exp(
            -sq_distance / (2.0 * std ** 2))
        losses['motion_mle'] = tf.reduce_mean(-tf.log(1e-16 + tf.reduce_sum(activations_sample, axis=-1, name='loss')))
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        var_list = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if 'mo_noise_generator' in v.name]

        # put everything together
        train_stages['train_motion_sampling'] = {
                     'train_op': optimizer.minimize(losses['motion_mle'], var_list=var_list),
                     'batch_iterator_names': {'train': 'train1', 'val': 'val1'},
                     'monitor_losses': ['motion_mle'],
                     'validation_loss': 'motion_mle',
                     'plot': lambda e: self.plot_motion_model(sess, next(batch_iterators['val1']), motion_samples, plot_task) if e % 10 == 0 else None
                     }

        # TRAIN MEASUREMENT MODEL

        # apply model for all pairs of observations and states in that batch
        test_particles = tf.tile(self.placeholders['s'][tf.newaxis, :, 0], [self.batch_size, 1, 1])
        measurement_model_out = self.measurement_update(encodings[:, 0], test_particles, means, stds)

        # define loss (correct -> 1, incorrect -> 0) and optimizer
        correct_samples = tf.diag_part(measurement_model_out)
        incorrect_samples = measurement_model_out - tf.diag(tf.diag_part(measurement_model_out))
        losses['measurement_heuristic'] = tf.reduce_sum(-tf.log(correct_samples)) / tf.cast(self.batch_size, tf.float32) \
                                          + tf.reduce_sum(-tf.log(1.0 - incorrect_samples)) / tf.cast(self.batch_size * (self.batch_size - 1), tf.float32)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        var_list = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if 'encoder' in v.name or 'obs_like_estimator' in v.name]

        # put everything together
        train_stages['train_measurement_model'] = {
                     'train_op': optimizer.minimize(losses['measurement_heuristic'], var_list=var_list),
                     'batch_iterator_names': {'train': 'train1', 'val': 'val1'},
                     'monitor_losses': ['measurement_heuristic'],
                     'validation_loss': 'measurement_heuristic',
                     'plot': lambda e: self.plot_measurement_model(sess, batch_iterators['val1'], measurement_model_out) if e % 10 == 0 else None
                     }

        # TRAIN PARTICLE PROPOSER

        if self.use_proposer:

            # apply model (but only compute gradients until the encoding,
            # otherwise we would unlearn it and the observation likelihood wouldn't work anymore)
            proposed_particles = self.propose_particles(tf.stop_gradient(encodings[:, 0]), self.num_particles, state_mins, state_maxs)

            # define loss and optimizer
            std = 0.2
            sq_distance = compute_sq_distance(proposed_particles, self.placeholders['s'][:, :1], state_step_sizes)
            activations = (1 / self.num_particles_float) / tf.sqrt(2 * np.pi * std ** 2) * tf.exp(
                -sq_distance / (2.0 * std ** 2))
            losses['proposed_mle'] = tf.reduce_mean(-tf.log(1e-16 + tf.reduce_sum(activations, axis=-1)))
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            var_list = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if 'particle_proposer' in v.name]

            # put everything together
            train_stages['train_particle_proposer'] = {
                         'train_op': optimizer.minimize(losses['proposed_mle'], var_list=var_list),
                         'batch_iterator_names': {'train': 'train1', 'val': 'val1'},
                         'monitor_losses': ['proposed_mle'],
                         'validation_loss': 'proposed_mle',
                         'plot': lambda e: self.plot_particle_proposer(sess, next(batch_iterators['val1']), proposed_particles, plot_task) if e % 10 == 0 else None
                         }

        # END-TO-END TRAINING

        # model was already applyed further up -> particle_list, particle_probs_list

        # define losses and optimizer
        # first loss (which is being optimized)
        sq_distance = compute_sq_distance(particle_list, self.placeholders['s'][:, :, tf.newaxis, :], state_step_sizes)
        activations = particle_probs_list[:, :] / tf.sqrt(2 * np.pi * std ** 2) * tf.exp(
            -sq_distance / (2.0 * self.particle_std ** 2))
        losses['mle'] = tf.reduce_mean(-tf.log(1e-16 + tf.reduce_sum(activations, axis=2, name='loss')))
        # second loss (which we will monitor during execution)
        pred = self.particles_to_state(particle_list, particle_probs_list)

        sq_distance = compute_sq_distance(pred[:, -1, :], self.placeholders['s'][:, -1, :], state_step_sizes)
        losses['mse_last'] = tf.reduce_mean(sq_distance)
        # optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate)

        # put everything together
        train_stages['train_e2e'] = {
                     'train_op': optimizer.minimize(losses['mle']),
                     'batch_iterator_names': {'train': 'train', 'val': 'val'},
                     'monitor_losses': ['mse_last', 'mle'],
                     'validation_loss': 'mse_last',
                     'plot': lambda e: self.plot_particle_filter(sess, next(batch_iterators['val_ex']), particle_list,
                                                                 particle_probs_list, self.num_particles, state_step_sizes, plot_task) if e % 1 == 0 else None
                     }

        return losses, train_stages


    def load(self, sess, model_path, model_file='best_validation', statistics_file='statistics.npz', connect_and_initialize=True, modules=('encoder', 'mo_noise_generator', 'mo_transition_model', 'obs_like_estimator', 'particle_proposer')):

        if type(modules) not in [type(list()), type(tuple())]:
            raise Exception('modules must be a list or tuple, not a ' + str(type(modules)))

        # build the tensorflow graph
        if connect_and_initialize:
            # load training data statistics (which are needed to build the tf graph)
            statistics = dict(np.load(os.path.join(model_path, statistics_file)))
            for key in statistics.keys():
                if statistics[key].shape == ():
                    statistics[key] = statistics[key].item()  # convert 0d array of dictionary back to a normal dictionary

            # connect all modules into the particle filter
            self.connect_modules(**statistics)
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            statistics = None

        # load variables
        all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        vars_to_load = []
        loaded_modules = set()
        for v in all_vars:
            for m in modules:
                if m in v.name:
                    vars_to_load.append(v)
                    loaded_modules.add(m)

        print('Loading these modules:', loaded_modules)

        print('%s %s' % (model_path, model_file))
        print('%r %r' % (model_path, model_file))

        # restore variable values
        saver = tf.train.Saver(vars_to_load)  # <- var list goes in here
        saver.restore(sess, os.path.join(model_path, model_file))

        print('Loaded the following variables:')
        for v in vars_to_load:
            print(v.name)

        return statistics


    def fit(self, sess, data, model_path, train_individually, train_e2e, split_ratio, seq_len, batch_size, epoch_length, num_epochs, patience, learning_rate, dropout_keep_ratio, num_particles, particle_std, plot_task=None, plot=False):

        self.particle_std = particle_std

        # preprocess data
        data = split_data(data, ratio=split_ratio)
        epoch_lengths = {'train': epoch_length, 'val': epoch_length*2}
        batch_iterators = {'train': make_batch_iterator(data['train'], seq_len=seq_len, batch_size=batch_size),
                           'val': make_repeating_batch_iterator(data['val'], epoch_lengths['val'], batch_size=batch_size, seq_len=seq_len),
                           'train_ex': make_batch_iterator(data['train'], batch_size=batch_size, seq_len=seq_len),
                           'val_ex': make_batch_iterator(data['val'], batch_size=batch_size, seq_len=seq_len),
                           'train1': make_batch_iterator(data['train'], batch_size=batch_size, seq_len=2),
                           'val1': make_repeating_batch_iterator(data['val'], epoch_lengths['val'], batch_size=batch_size, seq_len=2),
                           }

        # compute some statistics of the training data
        means, stds, state_step_sizes, state_mins, state_maxs = compute_staticstics(data['train'])

        # build the tensorflow graph by connecting all modules in the particles filter
        particles, particle_probs, encodings, particle_list, particle_probs_list = self.connect_modules(means, stds, state_mins, state_maxs, state_step_sizes)

        # define losses and train stages for different ways of training (e.g. training individual models and e2e training)
        losses, train_stages = self.compile_training_stages(sess, batch_iterators, particle_list, particle_probs_list,
                                                            encodings, means, stds, state_step_sizes, state_mins,
                                                            state_maxs, learning_rate, plot_task)

        # initialize variables
        init = tf.global_variables_initializer()
        sess.run(init)

        # save statistics and prepare saving variables
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        np.savez(os.path.join(model_path, 'statistics'), means=means, stds=stds, state_step_sizes=state_step_sizes,
                 state_mins=state_mins, state_maxs=state_maxs)
        saver = tf.train.Saver()
        save_path = os.path.join(model_path, 'best_validation')

        # define the training curriculum
        curriculum = []
        if train_individually:
            if self.learn_odom:
                curriculum += ['train_odom']
            curriculum += ['train_motion_sampling']
            curriculum += ['train_measurement_model']
            if self.use_proposer:
                curriculum += ['train_particle_proposer']
        if train_e2e:
            curriculum += ['train_e2e']

        # split data for early stopping
        data_keys = ['train']
        if split_ratio < 1.0:
            data_keys.append('val')

        # define log dict
        log = {c: {dk: {lk: {'mean': [], 'se': []} for lk in train_stages[c]['monitor_losses']} for dk in data_keys} for c in curriculum}

        # go through curriculum
        for c in curriculum:

            stage = train_stages[c]
            best_val_loss = np.inf
            best_epoch = 0
            epoch = 0

            while epoch < num_epochs and epoch - best_epoch < patience:
                # training
                for dk in data_keys:
                    # don't train in the first epoch, just evaluate the initial parameters
                    if dk == 'train' and epoch == 0:
                        continue
                    # set up loss lists which will be filled during the epoch
                    loss_lists = {lk: [] for lk in stage['monitor_losses']}
                    for e in range(epoch_lengths[dk]):
                        # t0 = time.time()
                        # pick a batch from the right iterator
                        batch = next(batch_iterators[stage['batch_iterator_names'][dk]])

                        # define the inputs and train/run the model
                        input_dict = {**{self.placeholders[key]: batch[key] for key in 'osa'},
                                      **{self.placeholders['num_particles']: num_particles},
                                      }
                        if dk == 'train':
                            input_dict[self.placeholders['keep_prob']] = dropout_keep_ratio
                        monitor_losses = {l: losses[l] for l in stage['monitor_losses']}
                        if dk == 'train':
                            s_losses, _ = sess.run([monitor_losses, stage['train_op']], input_dict)
                        else:
                            s_losses = sess.run(monitor_losses, input_dict)

                        for lk in stage['monitor_losses']:
                            loss_lists[lk].append(s_losses[lk])

                    # after each epoch, compute and log statistics
                    for lk in stage['monitor_losses']:
                        log[c][dk][lk]['mean'].append(np.mean(loss_lists[lk]))
                        log[c][dk][lk]['se'].append(np.std(loss_lists[lk], ddof=1) / np.sqrt(len(loss_lists[lk])))

                # check whether the current model is better than all previous models
                if 'val' in data_keys:
                    current_val_loss = log[c]['val'][stage['validation_loss']]['mean'][-1]
                    if current_val_loss < best_val_loss:
                        best_val_loss = current_val_loss
                        best_epoch = epoch
                        # save current model
                        saver.save(sess, save_path)
                        txt = 'epoch {:>3} >> '.format(epoch)
                    else:
                        txt = 'epoch {:>3} == '.format(epoch)
                else:
                    best_epoch = epoch
                    saver.save(sess, save_path)
                    txt = 'epoch {:>3} >> '.format(epoch)

                # after going through all data sets, do a print out of the current result
                for lk in stage['monitor_losses']:
                    txt += '{}: '.format(lk)
                    for dk in data_keys:
                        if len(log[c][dk][lk]['mean']) > 0:
                            txt += '{:.2f}+-{:.2f}/'.format(log[c][dk][lk]['mean'][-1], log[c][dk][lk]['se'][-1])
                    txt = txt[:-1] + ' -- '
                print(txt)

                # t1 = time.time()
                # time_deltas.append(t1 - t0)

                if plot:
                    stage['plot'](epoch)

                epoch += 1

            # after running out of patience, restore the model with lowest validation loss
            saver.restore(sess, save_path)

        return log


    def predict(self, sess, batch, num_particles, return_particles=False, **kwargs):
        # define input dict, use the first state only if we do tracking
        input_dict = {self.placeholders['o']: batch['o'],
                      self.placeholders['a']: batch['a'],
                      self.placeholders['num_particles']: num_particles}
        if self.init_with_true_state:
            input_dict[self.placeholders['s']] = batch['s'][:, :1]

        if return_particles:
            return sess.run([self.pred_states, self.particle_list, self.particle_probs_list], input_dict)
        else:
            return sess.run(self.pred_states, input_dict)


    def connect_modules(self, means, stds, state_mins, state_maxs, state_step_sizes):

        # get shapes
        self.batch_size = tf.shape(self.placeholders['o'])[0]
        self.seq_len = tf.shape(self.placeholders['o'])[1]
        # we use the static shape here because we need it to build the graph
        self.action_dim = self.placeholders['a'].get_shape()[-1].value

        encodings = snt.BatchApply(self.encoder)((self.placeholders['o'] - means['o']) / stds['o'])
        self.encodings = encodings

        # initialize particles
        if self.init_with_true_state:
            # tracking with known initial state
            initial_particles = tf.tile(self.placeholders['s'][:, 0, tf.newaxis, :], [1, self.num_particles, 1])
        else:
            # global localization
            if self.use_proposer:
                # propose particles from observations
                initial_particles = self.propose_particles(encodings[:, 0], self.num_particles, state_mins, state_maxs)
            else:
                # sample particles randomly
                initial_particles = tf.concat(
                    [tf.random_uniform([self.batch_size, self.num_particles, 1], state_mins[d], state_maxs[d]) for d in
                     range(self.state_dim)], axis=-1, name='particles')

        initial_particle_probs = tf.ones([self.batch_size, self.num_particles],
                                         name='particle_probs') / self.num_particles_float

        # assumes that samples has the correct size
        def permute_batch(x, samples):
            # get shapes
            batch_size = tf.shape(x)[0]
            num_particles = tf.shape(x)[1]
            sample_size = tf.shape(samples)[1]
            # compute 1D indices into the 2D array
            idx = samples + num_particles * tf.tile(
                tf.reshape(tf.range(batch_size), [batch_size, 1]),
                [1, sample_size])
            # index using the 1D indices and reshape again
            result = tf.gather(tf.reshape(x, [batch_size * num_particles, -1]), idx)
            result = tf.reshape(result, tf.shape(x[:,:sample_size]))
            return result


        def loop(particles, particle_probs, particle_list, particle_probs_list, additional_probs_list, i):

            num_proposed_float = tf.round((self.propose_ratio ** tf.cast(i, tf.float32)) * self.num_particles_float)
            num_proposed = tf.cast(num_proposed_float, tf.int32)
            num_resampled_float = self.num_particles_float - num_proposed_float
            num_resampled = tf.cast(num_resampled_float, tf.int32)

            if self.propose_ratio < 1.0:

                # resampling
                basic_markers = tf.linspace(0.0, (num_resampled_float - 1.0) / num_resampled_float, num_resampled)
                random_offset = tf.random_uniform([self.batch_size], 0.0, 1.0 / num_resampled_float)
                markers = random_offset[:, None] + basic_markers[None, :]  # shape: batch_size x num_resampled
                cum_probs = tf.cumsum(particle_probs, axis=1)
                marker_matching = markers[:, :, None] < cum_probs[:, None, :]  # shape: batch_size x num_resampled x num_particles
                samples = tf.cast(tf.argmax(tf.cast(marker_matching, 'int32'), dimension=2), 'int32')
                standard_particles = permute_batch(particles, samples)
                standard_particle_probs = tf.ones([self.batch_size, num_resampled])
                standard_particles = tf.stop_gradient(standard_particles)
                standard_particle_probs = tf.stop_gradient(standard_particle_probs)

                # motion update
                standard_particles = self.motion_update(self.placeholders['a'][:, i], standard_particles, means, stds, state_step_sizes)

                # measurement update
                standard_particle_probs *= self.measurement_update(encodings[:, i], standard_particles, means, stds)

            if self.propose_ratio > 0.0:

                # proposed particles
                proposed_particles = self.propose_particles(encodings[:, i], num_proposed, state_mins, state_maxs)
                proposed_particle_probs = tf.ones([self.batch_size, num_proposed])


            # NORMALIZE AND COMBINE PARTICLES
            if self.propose_ratio == 1.0:
                particles = proposed_particles
                particle_probs = proposed_particle_probs

            elif self.propose_ratio == 0.0:
                particles = standard_particles
                particle_probs = standard_particle_probs

            else:
                standard_particle_probs *= (num_resampled_float / self.num_particles_float) / tf.reduce_sum(standard_particle_probs, axis=1, keep_dims=True)
                proposed_particle_probs *= (num_proposed_float / self.num_particles_float) / tf.reduce_sum(proposed_particle_probs, axis=1, keep_dims=True)
                particles = tf.concat([standard_particles, proposed_particles], axis=1)
                particle_probs = tf.concat([standard_particle_probs, proposed_particle_probs], axis=1)

            # NORMALIZE PROBABILITIES
            particle_probs /= tf.reduce_sum(particle_probs, axis=1, keep_dims=True)

            particle_list = tf.concat([particle_list, particles[:, tf.newaxis]], axis=1)
            particle_probs_list = tf.concat([particle_probs_list, particle_probs[:, tf.newaxis]], axis=1)

            return particles, particle_probs, particle_list, particle_probs_list, additional_probs_list, i + 1

        # reshapes and sets the first shape sizes to None (which is necessary to keep the shape consistent in while loop)
        particle_list = tf.reshape(initial_particles,
                                   shape=[self.batch_size, -1, self.num_particles, self.state_dim])
        particle_probs_list = tf.reshape(initial_particle_probs, shape=[self.batch_size, -1, self.num_particles])
        additional_probs_list = tf.reshape(tf.ones([self.batch_size, self.num_particles, 4]), shape=[self.batch_size, -1, self.num_particles, 4])

        # run the filtering process
        particles, particle_probs, particle_list, particle_probs_list, additional_probs_list, i = tf.while_loop(
            lambda *x: x[-1] < self.seq_len, loop,
            [initial_particles, initial_particle_probs, particle_list, particle_probs_list, additional_probs_list,
             tf.constant(1, dtype='int32')], name='loop')

        # compute mean of particles
        self.pred_states = self.particles_to_state(particle_list, particle_probs_list)
        self.particle_list = particle_list
        self.particle_probs_list = particle_probs_list

        return particles, particle_probs, encodings, particle_list, particle_probs_list

    def particles_to_state(self, particle_list, particle_probs_list):
        mean_position = tf.reduce_sum(particle_probs_list[:, :, :, tf.newaxis] * particle_list[:, :, :, :2], axis=2)
        mean_orientation = atan2(
            tf.reduce_sum(particle_probs_list[:, :, :, tf.newaxis] * tf.cos(particle_list[:, :, :, 2:]), axis=2),
            tf.reduce_sum(particle_probs_list[:, :, :, tf.newaxis] * tf.sin(particle_list[:, :, :, 2:]), axis=2))
        return tf.concat([mean_position, mean_orientation], axis=2)


    def plot_motion_model(self, sess, batch, motion_samples, task):

        # define the inputs and train/run the model
        input_dict = {**{self.placeholders[key]: batch[key] for key in 'osa'},
                      **{self.placeholders['num_particles']: 100},
                      }

        s_motion_samples = sess.run(motion_samples, input_dict)

        plt.figure('Motion Model')
        plt.gca().clear()
        plot_maze(task)
        for i in range(min(len(s_motion_samples), 10)):
            plt.quiver(s_motion_samples[i, :, 0], s_motion_samples[i, :, 1], np.cos(s_motion_samples[i, :, 2]), np.sin(s_motion_samples[i, :, 2]), color='blue', width=0.001, scale=100)
            plt.quiver(batch['s'][i, 0, 0], batch['s'][i, 0, 1], np.cos(batch['s'][i, 0, 2]), np.sin(batch['s'][i, 0, 2]), color='black', scale=50, width=0.003)
            plt.quiver(batch['s'][i, 1, 0], batch['s'][i, 1, 1], np.cos(batch['s'][i, 1, 2]), np.sin(batch['s'][i, 1, 2]), color='red', scale=50, width=0.003)

        plt.gca().set_aspect('equal')
        plt.pause(0.01)


    def plot_measurement_model(self, sess, batch_iterator, measurement_model_out):

        batch = next(batch_iterator)

        # define the inputs and train/run the model
        input_dict = {**{self.placeholders[key]: batch[key] for key in 'osa'},
                      **{self.placeholders['num_particles']: 100},
                      }

        s_measurement_model_out = sess.run(measurement_model_out, input_dict)

        plt.figure('Measurement Model Output')
        plt.gca().clear()
        plt.imshow(s_measurement_model_out, interpolation="nearest", cmap="coolwarm")
        plt.pause(0.01)



    def plot_particle_proposer(self, sess, batch, proposed_particles, task):

        # define the inputs and train/run the model
        input_dict = {**{self.placeholders[key]: batch[key] for key in 'osa'},
                      **{self.placeholders['num_particles']: 100},
                      }

        s_samples = sess.run(proposed_particles, input_dict)

        plt.figure('Particle Proposer')
        plt.gca().clear()
        plot_maze(task)

        for i in range(min(len(s_samples), 10)):
            color = np.random.uniform(0.0, 1.0, 3)
            plt.quiver(s_samples[i, :, 0], s_samples[i, :, 1], np.cos(s_samples[i, :, 2]), np.sin(s_samples[i, :, 2]), color=color, width=0.001, scale=100)
            plt.quiver(batch['s'][i, 0, 0], batch['s'][i, 0, 1], np.cos(batch['s'][i, 0, 2]), np.sin(batch['s'][i, 0, 2]), color=color, scale=50, width=0.003)

        plt.pause(0.01)


    def plot_particle_filter(self, sess, batch, particle_list,
                        particle_probs_list, num_particles, state_step_sizes, task):

        num_particles = 1000
        head_scale = 1.5
        quiv_kwargs = {'scale_units': 'xy', 'scale': 1. / 40., 'width': 0.003, 'headlength': 5 * head_scale,
                       'headwidth': 3 * head_scale, 'headaxislength': 4.5 * head_scale}
        marker_kwargs = {'markersize': 4.5, 'markerfacecolor': 'None', 'markeredgewidth': 0.5}

        color_list = plt.cm.tab10(np.linspace(0, 1, 10))
        colors = {'lstm': color_list[0], 'pf_e2e': color_list[1], 'pf_ind_e2e': color_list[2], 'pf_ind': color_list[3],
                  'ff': color_list[4], 'odom': color_list[4]}

        pred, s_particle_list, s_particle_probs_list = self.predict(sess, batch, num_particles,
                                                                      return_particles=True)

        num_steps = 20  # s_particle_list.shape[1]

        for s in range(1):

            plt.figure("example {}".format(s), figsize=[12, 5.15])
            plt.gca().clear()

            for i in range(num_steps):
                ax = plt.subplot(4, 5, i + 1, frameon=False)
                plt.gca().clear()

                plot_maze(task, margin=5, linewidth=0.5)

                if i < num_steps - 1:
                    ax.quiver(s_particle_list[s, i, :, 0], s_particle_list[s, i, :, 1],
                              np.cos(s_particle_list[s, i, :, 2]), np.sin(s_particle_list[s, i, :, 2]),
                              s_particle_probs_list[s, i, :], cmap='viridis_r', clim=[.0, 2.0 / num_particles],
                              alpha=1.0,
                              **quiv_kwargs
                              )

                    current_state = batch['s'][s, i, :]
                    plt.quiver(current_state[0], current_state[1], np.cos(current_state[2]),
                               np.sin(current_state[2]), color="red", **quiv_kwargs)

                    plt.plot(current_state[0], current_state[1], 'or', **marker_kwargs)
                else:

                    ax.plot(batch['s'][s, :num_steps, 0], batch['s'][s, :num_steps, 1], '-', linewidth=0.6, color='red')
                    ax.plot(pred[s, :num_steps, 0], pred[s, :num_steps, 1], '-', linewidth=0.6,
                            color=colors['pf_ind_e2e'])

                    ax.plot(batch['s'][s, :1, 0], batch['s'][s, :1, 1], '.', linewidth=0.6, color='red', markersize=3)
                    ax.plot(pred[s, :1, 0], pred[s, :1, 1], '.', linewidth=0.6, markersize=3,
                            color=colors['pf_ind_e2e'])

                plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.001, hspace=0.1)
                plt.gca().set_aspect('equal')
                plt.xticks([])
                plt.yticks([])

        show_pause(pause=0.01)