python source code of batching

"""Functions to generate batches for the reinforcement learning part.
Mainly intended for training, though during the playing phase, the same
functions are used."""
from __future__ import print_function, division

import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

import models as models_reinforced

import cv2
from config import Config
import imgaug as ia
from lib.util import to_variable, to_cuda, to_numpy
from lib import util
from lib import actions as actionslib
from lib import replay_memory
import numpy as np
from scipy import misc
import multiprocessing
import threading
import random
import time

if sys.version_info[0] == 2:
    import cPickle as pickle
    from Queue import Full as QueueFull
elif sys.version_info[0] == 3:
    import pickle
    from queue import Full as QueueFull
    xrange = range

GPU = Config.GPU
NB_REWARD_BINS = 101

class BatchData(object):
    """Method encapsulating the data of a single batch.

    TODO some of the functions are named like properties, rename
    """

    def __init__(self, curr_idx, images_by_timestep, images_prev_by_timestep, multiactions, rewards, speeds, is_reverse, steering_wheel, steering_wheel_raw, previous_states_distances):
        self.curr_idx = curr_idx
        self.images_by_timestep = images_by_timestep
        self.images_prev_by_timestep = images_prev_by_timestep
        self.multiactions = multiactions
        self.rewards = rewards
        self.speeds = speeds
        self.is_reverse = is_reverse
        self.steering_wheel = steering_wheel
        self.steering_wheel_raw = steering_wheel_raw
        self.previous_states_distances = previous_states_distances

    @property
    def batch_size(self):
        return self.images_by_timestep.shape[1]

    @property
    def nb_future(self):
        return self.images_prev_by_timestep.shape[0] - 1

    @property
    def nb_prev_per_image(self):
        return self.images_prev_by_timestep.shape[2]

    def reward_bin_idx(self, timestep, inbatch_idx):
        timestep = self.curr_idx + timestep
        reward = self.rewards[timestep, inbatch_idx]
        reward_norm = (reward - Config.MIN_REWARD) / (Config.MAX_REWARD - Config.MIN_REWARD)
        reward_norm = 1 - reward_norm # top to bottom
        rewbin = np.clip(int(reward_norm * NB_REWARD_BINS), 0, NB_REWARD_BINS-1) # clip here, because MAX_REWARD ends up giving bin NB_REWARD_BINS, which is 1 too high
        return rewbin

    def rewards_bins(self, timestep):
        timestep = self.curr_idx + timestep
        T, B = self.rewards.shape
        result = np.zeros((B, NB_REWARD_BINS), dtype=np.float32)
        for b in xrange(B):
            rewbin = self.reward_bin_idx(timestep-self.curr_idx, b)
            result[b, rewbin] = 1
        return result

    def rewards_bins_all(self):
        T, B = self.rewards.shape
        bins_over_time = [self.rewards_bins(t) for t in xrange(-self.curr_idx, T-self.curr_idx)]
        return np.array(bins_over_time, dtype=np.float32)

    def inputs_supervised(self, volatile=False, requires_grad=True, gpu=GPU):
        images = to_cuda(to_variable(self.images_by_timestep[0], volatile=volatile, requires_grad=requires_grad), gpu)
        images_prev = to_cuda(to_variable(self.images_prev_by_timestep[0], volatile=volatile, requires_grad=requires_grad), gpu)
        return images, images_prev

    def inputs_reinforced_add_numpy(self, timestep=0):
        timestep = self.curr_idx + timestep
        B = self.batch_size

        prev_indices_exclusive = [timestep - d for d in self.previous_states_distances]
        prev_indices_inclusive = [timestep] + prev_indices_exclusive

        ma_vecs = np.zeros((self.batch_size, len(prev_indices_exclusive), 9), dtype=np.float32)
        for i, idx in enumerate(prev_indices_exclusive):
            mas = self.multiactions[idx]
            for b, ma in enumerate(mas):
                ma_vecs[b, i, :] = actionslib.ACTIONS_TO_MULTIVEC[ma]
        ma_vecs = ma_vecs.reshape(self.batch_size, -1) # (B, P*9) with P=number of previous images

        speeds = self.speeds[prev_indices_inclusive, :]
        steering_wheel = (self.steering_wheel[prev_indices_inclusive, :] - Config.STEERING_WHEEL_CNN_MIN) / (Config.STEERING_WHEEL_CNN_MAX - Config.STEERING_WHEEL_CNN_MIN)
        steering_wheel_raw = (self.steering_wheel_raw[prev_indices_inclusive, :] - Config.STEERING_WHEEL_RAW_CNN_MIN) / (Config.STEERING_WHEEL_RAW_CNN_MAX - Config.STEERING_WHEEL_RAW_CNN_MIN)
        vals = {
            "speeds": np.squeeze(np.clip(speeds / Config.MAX_SPEED, 0, 1)),
            "is_reverse": np.squeeze(self.is_reverse[prev_indices_inclusive, :]),
            "steering_wheel": np.squeeze(steering_wheel*2 - 1),
            "steering_wheel_raw": np.squeeze(steering_wheel_raw*2 - 1),
            "multiactions_vecs": ma_vecs
        }
        if B == 1:
            vals["speeds"] = vals["speeds"][:, np.newaxis]
            vals["is_reverse"] = vals["is_reverse"][:, np.newaxis]
            vals["steering_wheel"] = vals["steering_wheel"][:, np.newaxis]
            vals["steering_wheel_raw"] = vals["steering_wheel_raw"][:, np.newaxis]
        vals["speeds"] = vals["speeds"].transpose((1, 0)) # (P, B) => (B, P) with P=number of previous images
        vals["is_reverse"] = vals["is_reverse"].transpose((1, 0)) # (P, B) => (B, P) with P=number of previous images
        vals["steering_wheel"] = vals["steering_wheel"].transpose((1, 0)) # (P, B) => (B, P) with P=number of previous images
        vals["steering_wheel_raw"] = vals["steering_wheel_raw"].transpose((1, 0)) # (P, B) => (B, P) with P=number of previous images

        return vals

    def inputs_reinforced_add(self, volatile=False, requires_grad=True, gpu=GPU):
        return to_cuda(to_variable(self.inputs_reinforced_add_numpy(), volatile=volatile, requires_grad=requires_grad), gpu)

    def future_inputs_supervised(self, volatile=False, requires_grad=True, gpu=GPU):
        images = to_cuda(to_variable(self.images_by_timestep[1:], volatile=volatile, requires_grad=requires_grad), gpu)
        images_prev = to_cuda(to_variable(self.images_prev_by_timestep[1:], volatile=volatile, requires_grad=requires_grad), gpu)
        return images, images_prev

    def future_reinforced_add(self, volatile=False, requires_grad=True, gpu=GPU):
        vals = {
            "speeds": [],
            "is_reverse": [],
            "steering_wheel": [],
            "steering_wheel_raw": [],
            "multiactions_vecs": []
        }
        for timestep in xrange(1, self.nb_future+1):
            inputs_ts = self.inputs_reinforced_add_numpy(timestep=timestep)
            vals["speeds"].append(inputs_ts["speeds"])
            vals["is_reverse"].append(inputs_ts["is_reverse"])
            vals["steering_wheel"].append(inputs_ts["steering_wheel"])
            vals["steering_wheel_raw"].append(inputs_ts["steering_wheel_raw"])
            vals["multiactions_vecs"].append(inputs_ts["multiactions_vecs"])
        vals["speeds"] = np.array(vals["speeds"], dtype=np.float32)
        vals["is_reverse"] = np.array(vals["is_reverse"], dtype=np.float32)
        vals["steering_wheel"] = np.array(vals["steering_wheel"], dtype=np.float32)
        vals["steering_wheel_raw"] = np.array(vals["steering_wheel_raw"], dtype=np.float32)
        vals["multiactions_vecs"] = np.array(vals["multiactions_vecs"], dtype=np.float32)

        T, B, _ = vals["speeds"].shape
        vals_flat = {
            "speeds": vals["speeds"].reshape((T*B, -1)),
            "is_reverse": vals["is_reverse"].reshape((T*B, -1)),
            "steering_wheel": vals["steering_wheel"].reshape((T*B, -1)),
            "steering_wheel_raw": vals["steering_wheel_raw"].reshape((T*B, -1)),
            "multiactions_vecs": vals["multiactions_vecs"].reshape((T*B, -1))
        }

        return to_cuda(to_variable(vals_flat, volatile=volatile, requires_grad=requires_grad), gpu)

    def inputs_successor_multiactions_vecs(self, volatile=False, requires_grad=True, gpu=GPU):
        # the successor gets in actions a and has to predict the next
        # state, i.e. for tuples (s, a, r, s') it gets a and predicts s',
        # hence the future actions here start at curr_idx (current state index)
        # and end at -1
        arr = models_reinforced.SuccessorPredictor.multiactions_to_vecs(self.multiactions[self.curr_idx:-1])
        assert arr.shape == (self.nb_future, self.batch_size, 9)
        return to_cuda(to_variable(arr, volatile=volatile, requires_grad=requires_grad), gpu)

    def direct_rewards_values(self, volatile=False, requires_grad=True, gpu=GPU):
        rews = self.rewards[self.curr_idx, :][:,np.newaxis]
        rews = np.tile(rews, (1, 9))
        return to_cuda(to_variable(rews, volatile=volatile, requires_grad=requires_grad), gpu)

    def future_direct_rewards_values(self, volatile=False, requires_grad=True, gpu=GPU):
        rews = self.rewards[self.curr_idx+1:, :][:, :, np.newaxis]
        rews = np.tile(rews, (1, 1, 9))
        return to_cuda(to_variable(rews, volatile=volatile, requires_grad=requires_grad), gpu)

    def outputs_dr_gt(self, volatile=False, requires_grad=True, gpu=GPU):
        # for a tuple (s, a, r, s'), the reward r is ought to be predicted
        # that is, the reward for the previous action, which is dependent
        # on the new state s' that it created
        # it is saved at the previous timestep, i.e. at state s, hence
        # here -1
        bins = self.rewards_bins(-1)
        return to_cuda(to_variable(bins, volatile=volatile, requires_grad=requires_grad), gpu)

    def outputs_dr_future_gt(self, volatile=False, requires_grad=True, gpu=GPU):
        # starting at curr_idx and ending at -1 here for the same reason
        # as above
        bins = self.rewards_bins_all()
        bins = bins[self.curr_idx:-1]
        return to_cuda(to_variable(bins, volatile=volatile, requires_grad=requires_grad), gpu)

    def outputs_ae_gt(self, volatile=False, requires_grad=True, gpu=GPU):
        imgs = self.images_by_timestep[0, ...]
        imgs = np.clip(imgs*255, 0, 255).astype(np.uint8).transpose((0, 2, 3, 1))
        imgs_rs = ia.imresize_many_images(imgs, (45, 80))
        imgs_rs = (imgs_rs / 255.0).astype(np.float32).transpose((0, 3, 1, 2))
        return to_cuda(to_variable(imgs_rs, volatile=volatile, requires_grad=requires_grad), gpu)

    def chosen_action_indices(self):
        mas_timestep = self.multiactions[self.curr_idx]
        indices = [np.argmax(actionslib.ACTIONS_TO_MULTIVEC[ma]) for ma in mas_timestep]
        return indices

    def chosen_action_indices_future(self):
        indices_by_timestep = []
        for t_idx in xrange(self.nb_future):
            mas_timestep = self.multiactions[t_idx]
            indices = [np.argmax(actionslib.ACTIONS_TO_MULTIVEC[ma]) for ma in mas_timestep]
            indices_by_timestep.append(indices)
        return indices_by_timestep

    def draw(self, timestep=0, inbatch_idx=0):
        timestep = self.curr_idx + timestep
        img = self.images_by_timestep[timestep-self.curr_idx, inbatch_idx, :, :, :]
        img = (img.transpose((1, 2, 0))*255).astype(np.uint8)
        imgs_prev = self.images_prev_by_timestep[timestep-self.curr_idx, inbatch_idx, :, :, :]
        imgs_prev = (imgs_prev.transpose((1, 2, 0))*255).astype(np.uint8)

        h, w = img.shape[0:2]
        imgs_viz = [img] + [np.tile(imgs_prev[..., i][:, :, np.newaxis], (1, 1, 3)) for i in xrange(imgs_prev.shape[2])]
        imgs_viz = [ia.imresize_single_image(im, (h, w), interpolation="cubic") for im in imgs_viz]
        imgs_viz = np.hstack(imgs_viz)

        rewards_bins = self.rewards_bins_all()
        mas = [self.multiactions[i][inbatch_idx] for i in xrange(timestep-self.nb_prev_per_image, timestep)]
        pos = [timestep] + [timestep-d for d in self.previous_states_distances]
        reinforced_add = self.inputs_reinforced_add_numpy(timestep=timestep-self.curr_idx)
        outputs_dr_gt = self.outputs_dr_gt()[inbatch_idx]
        texts = [
            "pos: " + " ".join([str(i) for i in pos]),
            "Rewards:      " + " ".join(["%.2f" % (self.rewards[i, inbatch_idx],) for i in pos]),
            "Rewards bins: " + " ".join(["%d" % (np.argmax(rewards_bins[i, inbatch_idx]),) for i in pos]),
            "Speeds:       " + " ".join(["%.2f" % (self.speeds[i, inbatch_idx],) for i in pos]),
            "Multiactions: " + " ".join(["%s%s" % (ma[0], ma[1]) for ma in mas]),
            "Speeds RA:    " + " ".join(["%.3f" % (reinforced_add["speeds"][inbatch_idx, i],) for i in xrange(reinforced_add["speeds"].shape[1])]),
            "outputs_dr_gt[t=-1]: " + "%d" % (np.argmax(to_numpy(outputs_dr_gt)),)
        ]
        texts = "\n".join(texts)

        result = np.zeros((imgs_viz.shape[0]*3, imgs_viz.shape[1], 3), dtype=np.uint8)
        util.draw_image(result, x=0, y=0, other_img=imgs_viz, copy=False)
        result = util.draw_text(result, x=0, y=imgs_viz.shape[0]+4, text=texts, size=9)
        return result

def states_to_batch(previous_states_list, states_list, augseq, previous_states_distances, model_height, model_width, model_prev_height, model_prev_width):
    """Convert multiple chains of states into a batch.

    Parameters
    ----------
    previous_states_list : list of list of State
        Per chain of states a list of the previous states.
        First index of the list is the batch index,
        second index is the timestep. The oldest states come first.
    states_list : list of list of State
        Per chain of states a list of states that contain the "current"
        state at the start, followed by future states.
        First index is batch index, second timestep.
    augseq : Augmenter
        Sequence of augmenters to apply to each image. Use Noop() to make
        no changes.
    previous_states_distances : list of int
        List of distances relative to the current state. Each distance
        refers to one previous state to add to the model input.
        E.g. [2, 1] adds the state 200ms and 100ms before the current "state".
    model_height : int
        Height of the model input images (current state).
    model_width : int
        Width of the model input images (current state).
    model_prev_height : int
        Height of the model input images (previous states).
    model_prev_width : int
        Width of the model input images (previous states).

    Returns
    ----------
    List of BatchData
    """
    assert isinstance(previous_states_list, list)
    assert isinstance(states_list, list)
    assert isinstance(previous_states_list[0], list)
    assert isinstance(states_list[0], list)
    assert len(previous_states_list) == len(states_list)

    B = len(states_list)
    H, W = model_height, model_width
    Hp, Wp = model_prev_height, model_prev_width

    nb_prev_load = max(previous_states_distances)
    nb_future_states = len(states_list[0]) - 1
    nb_timesteps = nb_prev_load + 1 + nb_future_states
    #images = np.zeros((nb_timesteps, B, H, W, 3), dtype=np.uint8)
    #images_gray = np.zeros((nb_timesteps, B, Hp, Wp), dtype=np.float32)
    images_by_timestep = np.zeros((1+nb_future_states, B, H, W, 3), dtype=np.float32)
    images_gray = np.zeros((nb_timesteps, B, Hp, Wp), dtype=np.float32)
    multiactions = [[] for i in xrange(nb_timesteps)]
    rewards = np.zeros((nb_timesteps, B), dtype=np.float32)
    speeds = np.zeros((nb_timesteps, B), dtype=np.float32)
    is_reverse = np.zeros((nb_timesteps, B), dtype=np.float32)
    steering_wheel = np.zeros((nb_timesteps, B), dtype=np.float32)
    steering_wheel_raw = np.zeros((nb_timesteps, B), dtype=np.float32)

    augseqs_det = [augseq.to_deterministic() for _ in xrange(len(states_list))]

    for b, (previous_states, states) in enumerate(zip(previous_states_list, states_list)):
        augseq_det = augseqs_det[b]

        all_states = previous_states + states
        for t, state in enumerate(all_states):
            imgy = cv2.cvtColor(state.screenshot_rs, cv2.COLOR_RGB2GRAY)
            imgy_rs = downscale(imgy, Hp, Wp)
            imgy_rs_aug = augseq_det.augment_image(imgy_rs)
            images_gray[t, b, ...] = imgy_rs

            multiactions[t].append(state.multiaction)
            rewards[t, b] = state.reward
            if state.speed is not None:
                speeds[t, b] = state.speed
            if state.is_reverse is not None:
                is_reverse[t, b] = int(state.is_reverse)
            if state.steering_wheel_cnn is not None:
                steering_wheel[t, b] = state.steering_wheel_cnn
            if state.steering_wheel_raw_cnn is not None:
                steering_wheel_raw[t, b] = state.steering_wheel_raw_cnn
    images_gray = images_gray[..., np.newaxis]

    for b, states in enumerate(states_list):
        augseq_det = augseqs_det[b]

        for i, state in enumerate(states):
            state = states[i]
            images_by_timestep[i, b, ...] = augseq_det.augment_image(downscale(state.screenshot_rs, H, W))

    nb_prev_per_img = len(previous_states_distances)
    images_prev_by_timestep = np.zeros((1+nb_future_states, B, Hp, Wp, nb_prev_per_img), dtype=np.float32)
    for t in xrange(1 + nb_future_states):
        indices = [nb_prev_load+t-d for d in previous_states_distances]
        prev = images_gray[indices]
        prev = prev.transpose((1, 2, 3, 4, 0)).reshape((B, Hp, Wp, nb_prev_per_img))
        images_prev_by_timestep[t] = prev
    images_by_timestep = (images_by_timestep.astype(np.float32) / 255.0).transpose((0, 1, 4, 2, 3))
    images_prev_by_timestep = (images_prev_by_timestep.astype(np.float32) / 255.0).transpose((0, 1, 4, 2, 3))

    return BatchData(nb_prev_load, images_by_timestep, images_prev_by_timestep, multiactions, rewards, speeds, is_reverse, steering_wheel, steering_wheel_raw, previous_states_distances)

def downscale(im, h, w):
    if im.ndim == 2:
        im = im[:, :, np.newaxis]
        return np.squeeze(ia.imresize_single_image(im, (h, w), interpolation="cubic"))
    else:
        return ia.imresize_single_image(im, (h, w), interpolation="cubic")

class BatchLoader(object):
    """Class to load batches from the replay memory."""

    def __init__(self, val, batch_size, augseq, previous_states_distances, nb_future_states, model_height, model_width, model_prev_height, model_prev_width):
        self.val = val
        self.batch_size = batch_size
        self.augseq = augseq.deepcopy()
        self.augseq.reseed(random.randint(0, 10**6))
        self.previous_states_distances = previous_states_distances
        self.nb_future_states = nb_future_states
        self.model_height = model_height
        self.model_width = model_width
        self.model_prev_height = model_prev_height
        self.model_prev_width = model_prev_width
        self._memory = None

    def load_random_batch(self):
        if self._memory is None:
            self._memory = replay_memory.ReplayMemory.create_instance_reinforced(val=self.val)
            self._memory.update_caches()
            print("Connected memory to %s, idmin=%d, idmax=%d" % ("val" if self.val else "train", self._memory.id_min, self._memory.id_max))
        memory = self._memory

        nb_prev = max(self.previous_states_distances)
        nb_timesteps = nb_prev + 1 + self.nb_future_states

        previous_states_list = []
        states_list = []
        for b in xrange(self.batch_size):
            statechain = memory.get_random_state_chain(nb_timesteps)
            previous_states_list.append(statechain[:nb_prev])
            states_list.append(statechain[nb_prev:])

        return states_to_batch(previous_states_list, states_list, self.augseq, self.previous_states_distances, self.model_height, self.model_width, self.model_prev_height, self.model_prev_width)

class BackgroundBatchLoader(object):
    """Class that takes a BatchLoader and executes it many times in background
    processes."""

    def __init__(self, batch_loader, queue_size, nb_workers, threaded=False):
        self.queue = multiprocessing.Queue(queue_size)
        self.workers = []
        self.exit_signal = multiprocessing.Event()
        for i in range(nb_workers):
            seed = random.randint(1, 10**6)
            if threaded:
                worker = threading.Thread(target=self._load_batches, args=(batch_loader, self.queue, self.exit_signal, None))
            else:
                worker = multiprocessing.Process(target=self._load_batches, args=(batch_loader, self.queue, self.exit_signal, seed))
            worker.daemon = True
            worker.start()
            self.workers.append(worker)

    def get_batch(self):
        return pickle.loads(self.queue.get())

    def _load_batches(self, batch_loader, queue, exit_signal, seed=None):
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
            batch_loader.augseq.reseed(seed)
            ia.seed(seed)

        while not exit_signal.is_set():
            batch = batch_loader.load_random_batch()

            start_time = time.time()
            batch_str = pickle.dumps(batch, protocol=-1)
            added_to_queue = False # without this, it will add the batch countless times to the queue
            while not added_to_queue and not exit_signal.is_set():
                try:
                    queue.put(batch_str, timeout=1)
                    added_to_queue = True
                except QueueFull as e:
                    pass
            end_time = time.time()
        batch_loader._memory.close()

    def join(self):
        self.exit_signal.set()
        time.sleep(5)

        while not self.queue.empty():
            _ = self.queue.get()
        #self.queue.join()

        for worker in self.workers:
            #worker.join()
            worker.terminate()

if __name__ == "__main__":
    from scipy import misc
    from imgaug import augmenters as iaa

    MODEL_HEIGHT = 90
    MODEL_WIDTH = 160
    MODEL_PREV_HEIGHT = 45
    MODEL_PREV_WIDTH = 80

    loader = BatchLoader(
        val=False, batch_size=8, augseq=iaa.Noop(),
        previous_states_distances=[2, 4, 6, 8, 10],
        nb_future_states=10,
        model_height=MODEL_HEIGHT, model_width=MODEL_WIDTH,
        model_prev_height=MODEL_PREV_HEIGHT, model_prev_width=MODEL_PREV_HEIGHT
    )
    for _ in xrange(1000):
        for t in xrange(3):
            imgs = []
            for b in xrange(3):
                print(t, b)
                batch = loader.load_random_batch()
                imgs.append(batch.draw(timestep=t, inbatch_idx=b))
            misc.imshow(np.vstack(imgs))