python source code of simulator

# -*- coding: utf-8 -*-
# SPDX-License-Identifier: Apache-2.0

"""
:Author: FMR LLC
:Email: mabwiser@fmr.com
:Version: 1.10.0 of June 22, 2020

This module provides a simulation utility for comparing algorithms and hyper-parameter tuning.
"""

import logging
from copy import deepcopy
from itertools import chain
from typing import Union, List, Optional, NoReturn

import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from joblib import Parallel, delayed
from scipy.spatial.distance import cdist
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

from mabwiser.base_mab import BaseMAB
from mabwiser.greedy import _EpsilonGreedy
from mabwiser.linear import _Linear
from mabwiser.mab import MAB
from mabwiser.neighbors import _Neighbors, _Radius, _KNearest
from mabwiser.popularity import _Popularity
from mabwiser.rand import _Random
from mabwiser.softmax import _Softmax
from mabwiser.thompson import _ThompsonSampling
from mabwiser.ucb import _UCB1
from mabwiser.utils import Arm, Num, check_true, Constants, _BaseRNG, create_rng


def default_evaluator(arms: List[Arm], decisions: np.ndarray, rewards: np.ndarray, predictions: List[Arm],
                      arm_to_stats: dict, stat: str, start_index: int, nn: bool = False) -> dict:
    """Default evaluation function.

    Calculates predicted rewards for the test batch based on predicted arms.
    When the predicted arm is the same as the historic decision, the historic reward is used.
    When the predicted arm is different, the mean, min or max reward from the training data is used.
    If using Radius or KNearest neighborhood policy, the statistics from the neighborhood are used
    instead of the entire training set.

    The simulator supports custom evaluation functions,
    but they must have this signature to work with the simulation pipeline.

    Parameters
    ----------
    arms: list
        The list of arms.
    decisions: np.ndarray
        The historic decisions for the batch being evaluated.
    rewards: np.ndarray
        The historic rewards for the batch being evaluated.
    predictions: list
        The predictions for the batch being evaluated.
    arm_to_stats: dict
        The dictionary of descriptive statistics for each arm to use in evaluation.
    stat: str
        Which metric from arm_to_stats to use. Takes the values 'min', 'max', 'mean'.
    start_index: int
        The index of the first row in the batch.
        For offline simulations it is 0.
        For _online simulations it is batch size * batch number.
        Used to select the correct index from arm_to_stats if there are separate entries for each row in the test set.
    nn: bool
        Whether the results are from one of the simulator custom nearest neighbors implementations.

    Returns
    -------
    An arm_to_stats dictionary for the predictions in the batch.
    Dictionary has the format {arm {'count', 'sum', 'min', 'max', 'mean', 'std'}}
    """
    # If decision and prediction matches each other, use the observed reward
    # If decision and prediction are different, use the given stat (e.g., mean) for the arm as the reward

    arm_to_rewards = dict((arm, []) for arm in arms)
    if nn:
        arm_to_stats, neighborhood_stats = arm_to_stats
    for index, predicted_arm in enumerate(predictions):

        if predicted_arm == decisions[index]:
            arm_to_rewards[predicted_arm].append(rewards[index])
        elif nn:
            nn_index = index + start_index
            row_neighborhood_stats = neighborhood_stats[nn_index]
            if row_neighborhood_stats and row_neighborhood_stats[predicted_arm]:
                arm_to_rewards[predicted_arm].append(row_neighborhood_stats[predicted_arm][stat])
            else:
                arm_to_rewards[predicted_arm].append(arm_to_stats[predicted_arm][stat])

        else:
            arm_to_rewards[predicted_arm].append(arm_to_stats[predicted_arm][stat])

    # Calculate stats based on the rewards from predicted arms
    arm_to_stats_prediction = {}
    for arm in arms:
        arm_to_rewards[arm] = np.array(arm_to_rewards[arm])
        if len(arm_to_rewards[arm]) > 0:
            arm_to_stats_prediction[arm] = {'count': arm_to_rewards[arm].size, 'sum': arm_to_rewards[arm].sum(),
                                            'min': arm_to_rewards[arm].min(), 'max': arm_to_rewards[arm].max(),
                                            'mean': arm_to_rewards[arm].mean(), 'std': arm_to_rewards[arm].std()}
        else:
            arm_to_stats_prediction[arm] = {'count': 0, 'sum': math.nan,
                                            'min': math.nan, 'max': math.nan,
                                            'mean': math.nan, 'std': math.nan}

    return arm_to_stats_prediction


class _NeighborsSimulator(_Neighbors):

    def __init__(self, rng: _BaseRNG, arms: List[Arm], n_jobs: int, backend: Optional[str],
                 lp: Union[_EpsilonGreedy, _Linear, _Popularity, _Random, _Softmax, _ThompsonSampling, _UCB1],
                 metric: str, is_quick: bool):
        super().__init__(rng, arms, n_jobs, backend, lp, metric)
        self.is_quick = is_quick
        self.neighborhood_arm_to_stat = []
        self.raw_rewards = None
        self.row_arm_to_expectation = []
        self.distances = None
        self.is_contextual = True
        self.neighborhood_sizes = []

    def fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None):
        if isinstance(self.lp, _ThompsonSampling) and self.lp.binarizer:
            self.raw_rewards = rewards.copy()

        super().fit(decisions, rewards, contexts)

    def partial_fit(self, decisions: np.ndarray, rewards: np.ndarray, contexts: np.ndarray = None):
        if isinstance(self.lp, _ThompsonSampling) and self.lp.binarizer:
            self.raw_rewards = np.concatenate((self.raw_rewards, rewards.copy()))

        super().partial_fit(decisions, rewards, contexts)

    def predict(self, contexts: Optional[np.ndarray] = None):
        return self._predict_operation(contexts, is_predict=True)

    def predict_expectations(self, contexts: np.ndarray = None):
        return self._predict_operation(contexts, is_predict=False)

    def calculate_distances(self, contexts: np.ndarray):

        # Partition contexts by job
        n_jobs, n_contexts, starts = self._partition_contexts(len(contexts))

        # Calculate distances in parallel
        distances = Parallel(n_jobs=n_jobs, backend=self.backend)(
                             delayed(self._calculate_distances_of_batch)(
                                     contexts[starts[i]:starts[i + 1]])
                             for i in range(n_jobs))

        # Reduce
        self.distances = list(chain.from_iterable(t for t in distances))

        return self.distances

    def set_distances(self, distances):
        self.distances = distances

    def _calculate_distances_of_batch(self, contexts: np.ndarray):
        distances = [None] * len(contexts)
        for index, row in enumerate(contexts):
            # Calculate the distances from the historical contexts
            # Row is 1D so convert it to 2D array for cdist using newaxis
            # Finally, reshape to flatten the output distances list
            row_2d = row[np.newaxis, :]
            distances[index] = cdist(self.contexts, row_2d, metric=self.metric).reshape(-1)
        return distances

    def _predict_operation(self, contexts, is_predict):
        # Return predict within the neighborhood
        out = self._parallel_predict(contexts, is_predict=is_predict)

        if isinstance(out[0], list):
            df = pd.DataFrame(out, columns=['prediction', 'expectations', 'size', 'stats'])

            if is_predict:
                self.row_arm_to_expectation = self.row_arm_to_expectation + df['expectations'].tolist()
            else:
                self.row_arm_to_expectation = self.row_arm_to_expectation + df['prediction'].tolist()
            if not self.is_quick:
                self.neighborhood_sizes = self.neighborhood_sizes + df['size'].tolist()
                self.neighborhood_arm_to_stat = self.neighborhood_arm_to_stat + df['stats'].tolist()

            return df['prediction'].tolist()

        # Single row prediction
        else:
            prediction, expectation, size, stats = out
            if is_predict:
                self.row_arm_to_expectation = self.row_arm_to_expectation + [expectation]
            else:
                self.row_arm_to_expectation = self.row_arm_to_expectation + [prediction]
            if not self.is_quick:
                self.neighborhood_sizes = self.neighborhood_sizes + [size]
                self.neighborhood_arm_to_stat = self.neighborhood_arm_to_stat + [stats]
            return prediction

    def _get_nhood_predictions(self, lp, row_2d, indices, is_predict):

        nn_decisions = self.decisions[indices]
        nn_rewards = self.rewards[indices]

        if isinstance(lp, _ThompsonSampling) and self.lp.binarizer:
            nn_raw_rewards = self.raw_rewards[indices]

        arm_to_stat = {}
        if not self.is_quick:
            for arm in self.arms:
                if isinstance(lp, _ThompsonSampling) and self.lp.binarizer:
                    arm_rewards = nn_raw_rewards[nn_decisions == arm]
                else:
                    arm_rewards = nn_rewards[nn_decisions == arm]

                if len(arm_rewards > 0):
                    arm_to_stat[arm] = Simulator.get_stats(arm_rewards)
                else:
                    arm_to_stat[arm] = {}

        # Fit the decisions and rewards of the neighbors
        lp.fit(nn_decisions, nn_rewards, self.contexts[indices])

        # Predict based on the neighbors
        if is_predict:
            prediction = lp.predict(row_2d)
            if isinstance(lp, _ThompsonSampling):
                arm_to_expectation = lp.arm_to_expectation.copy()
            else:
                arm_to_expectation = lp.predict_expectations(row_2d)

            return prediction, arm_to_expectation, arm_to_stat
        else:
            prediction = lp.predict_expectations(row_2d)

            return prediction, {}, arm_to_stat


class _RadiusSimulator(_NeighborsSimulator):

    def __init__(self, rng: _BaseRNG, arms: List[Arm], n_jobs: int, backend: Optional[str],
                 lp: Union[_EpsilonGreedy, _Linear, _Popularity, _Random, _Softmax, _ThompsonSampling, _UCB1],
                 radius: Num, metric: str, is_quick: bool, no_nhood_prob_of_arm=Optional[List]):
        super().__init__(rng, arms, n_jobs, backend, lp, metric, is_quick)
        self.radius = radius
        self.no_nhood_prob_of_arm = no_nhood_prob_of_arm

    def _predict_contexts(self, contexts: np.ndarray, is_predict: bool,
                          seeds: Optional[np.ndarray] = None, start_index: Optional[int] = None) -> List:

        # Copy learning policy object
        lp = deepcopy(self.lp)

        # Create an empty list of predictions
        predictions = [None] * len(contexts)

        # For each row in the given contexts
        for index, row in enumerate(contexts):

            # Get random generator
            lp.rng = create_rng(seeds[index])

            # Calculate the distances from the historical contexts
            # Row is 1D so convert it to 2D array for cdist using newaxis
            # Finally, reshape to flatten the output distances list
            row_2d = row[np.newaxis, :]
            distances_to_row = self.distances[start_index + index]

            # Find the neighbor indices within the radius
            # np.where with a condition returns a tuple where the first element is an array of indices
            indices = np.where(distances_to_row <= self.radius)

            # If neighbors exist
            if indices[0].size > 0:

                prediction, exp, stats = self._get_nhood_predictions(lp, row_2d, indices, is_predict)
                predictions[index] = [prediction, exp, len(indices[0]), stats]

            else:  # When there are no neighbors

                # Random arm (or nan expectations)
                prediction = self._get_no_nhood_predictions(lp, is_predict)
                predictions[index] = [prediction, {}, 0, {}]

        # Return the list of predictions
        return predictions

    def _get_no_nhood_predictions(self, lp, is_predict):
        if is_predict:
            # if no_nhood_prob_of_arm is None, select a random int
            # else, select a non-uniform random arm
            # choice returns an array, hence get zero index
            rand_int = lp.rng.choice(len(self.arms), 1, p=self.no_nhood_prob_of_arm)[0]
            return self.arms[rand_int]
        else:
            # Expectations will be nan when there are no neighbors
            return self.arm_to_expectation.copy()


class _KNearestSimulator(_NeighborsSimulator):

    def __init__(self, rng: _BaseRNG, arms: List[Arm], n_jobs: int, backend: Optional[str],
                 lp: Union[_EpsilonGreedy, _Linear, _Popularity, _Random, _Softmax, _ThompsonSampling, _UCB1],
                 k: int, metric: str, is_quick: bool):
        super().__init__(rng, arms, n_jobs, backend, lp, metric, is_quick)
        self.k = k

    def _predict_contexts(self, contexts: np.ndarray, is_predict: bool,
                          seeds: Optional[np.ndarray] = None, start_index: Optional[int] = None) -> List:

        # Copy Learning Policy object and set random state
        lp = deepcopy(self.lp)

        # Create an empty list of predictions
        predictions = [None] * len(contexts)

        # For each row in the given contexts
        for index, row in enumerate(contexts):

            # Get random generator
            lp.rng = create_rng(seed=seeds[index])

            # Calculate the distances from the historical contexts
            # Row is 1D so convert it to 2D array for cdist using newaxis
            # Finally, reshape to flatten the output distances list
            row_2d = row[np.newaxis, :]
            distances_to_row = self.distances[start_index + index]

            # Find the k nearest neighbor indices
            indices = np.argpartition(distances_to_row, self.k - 1)[:self.k]

            prediction, exp, stats = self._get_nhood_predictions(lp, row_2d, indices, is_predict)
            predictions[index] = [prediction, exp, self.k, stats]

        # Return the list of predictions
        return predictions


class Simulator:
    """ Multi-Armed Bandit Simulator.

    This utility runs a simulation using historic data and a collection of multi-armed bandits from the MABWiser
    library or that extends the BaseMAB class in MABWiser.

    It can be used to run a simple simulation with a single bandit or to compare multiple bandits for policy selection,
    hyper-parameter tuning, etc.

    Nearest Neighbor bandits that use the default Radius and KNearest implementations from MABWiser are converted to
    custom versions that share distance calculations to speed up the simulation. These custom versions also track
    statistics about the neighborhoods that can be used in evaluation.

    The results can be accessed as the arms_to_stats, model_to_predictions, model_to_confusion_matrices, and
    models_to_evaluations properties.

    When using partial fitting, an additional confusion matrix is calculated for all predictions after all of the
    batches are processed.

    A log of the simulation tracks the experiment progress.

    Attributes
    ----------
    bandits: list[(str, bandit)]
        A list of tuples of the name of each bandit and the bandit object.
    decisions: array
        The complete decision history to be used in train and test.
    rewards: array
        The complete array history to be used in train and test.
    contexts: array
        The complete context history to be used in train and test.
    scaler: scaler
        A scaler object from sklearn.preprocessing.
    test_size: float
        The size of the test set
    is_ordered: bool
        Whether to use a chronological division for the train-test split.
        If false, uses sklearn's train_test_split.
    batch_size: int
        The size of each batch for online learning.
    evaluator: callable
        The function for evaluating the bandits. Values are stored in bandit_to_arm_to_stats_avg.
        Must have the function signature function(arms_to_stats_train: dictionary, predictions: list,
        decisions: np.ndarray, rewards: np.ndarray, metric: str).
    is_quick: bool
        Flag to skip neighborhood statistics.
    logger: Logger
        The logger object.
    arms: list
        The list of arms used by the bandits.
    arm_to_stats_total: dict
        Descriptive statistics for the complete data set.
    arm_to_stats_train: dict
        Descriptive statistics for the training data.
    arm_to_stats_test: dict
        Descriptive statistics for the test data.
    bandit_to_arm_to_stats_avg: dict
        Descriptive statistics for the predictions made by each bandit based on means from training data.
    bandit_to_arm_to_stats_min: dict
        Descriptive statistics for the predictions made by each bandit based on minimums from training data.
    bandit_to_arm_to_stats_max: dict
        Descriptive statistics for the predictions made by each bandit based on maximums from training data.
    bandit_to_confusion_matrices: dict
        The confusion matrices for each bandit.
    bandit_to_predictions: dict
        The prediction for each item in the test set for each bandit.
    bandit_to_expectations: dict
        The arm_to_expectations for each item in the test set for each bandit.
        For context-free bandits, there is a single dictionary for each batch.
    bandit_to_neighborhood_size: dict
        The number of neighbors in each neighborhood for each row in the test set.
        Calculated when using a Radius neighborhood policy, or a custom class that inherits from it.
        Not calculated when is_quick is True.
    bandit_to_arm_to_stats_neighborhoods: dict
        The arm_to_stats for each neighborhood for each row in the test set.
        Calculated when using Radius or KNearest, or a custom class that inherits from one of them.
        Not calculated when is_quick is True.
    test_indices: list
        The indices of the rows in the test set.
        If input was not zero-indexed, these will reflect their position in the input rather than actual index.

    Example
    -------
        >>> from mabwiser.mab import MAB, LearningPolicy
        >>> arms = ['Arm1', 'Arm2']
        >>> decisions = ['Arm1', 'Arm1', 'Arm2', 'Arm1']
        >>> rewards = [20, 17, 25, 9]
        >>> mab1 = MAB(arms, LearningPolicy.EpsilonGreedy(epsilon=0.25), seed=123456)
        >>> mab2 = MAB(arms, LearningPolicy.EpsilonGreedy(epsilon=0.30), seed=123456)
        >>> bandits = [('EG 25%', mab1), ('EG 30%', mab2)]
        >>> offline_sim = Simulator(bandits, decisions, rewards, test_size=0.5, batch_size=0)
        >>> offline_sim.run()
        >>> offline_sim.bandit_to_arm_to_stats_avg['EG 30%']['Arm1']
        {'count': 1, 'sum': 9, 'min': 9, 'max': 9, 'mean': 9.0, 'std': 0.0}

    """

    def __init__(self, bandits: List[tuple],                                    # List of tuples of names and bandits
                 decisions: Union[List[Arm], np.ndarray, pd.Series],            # Decisions that are made
                 rewards: Union[List[Num], np.ndarray, pd.Series],              # Rewards that are received
                 contexts: Union[None, List[List[Num]],
                                 np.ndarray, pd.Series, pd.DataFrame] = None,   # Contexts, optional
                 scaler: callable = None,                                       # Scaler for contexts
                 test_size: float = 0.3,                                        # Fraction to use for test batch
                 is_ordered: bool = False,                                      # Whether to use chronological order
                 batch_size: int = 0,                                           # Batch size for online learning
                 evaluator: callable = default_evaluator,                       # Evaluator function
                 seed: int = Constants.default_seed,                            # Random seed
                 is_quick: bool = False,                                        # Quick run flag
                 log_file: str = None,                                          # Log file name
                 log_format: str = '%(asctime)s %(levelname)s %(message)s'):    # Log file format
        """Simulator

        Creates a simulator object with a collection of bandits, the history of decisions, rewards, and contexts, and
        the parameters for the simulation.

        Parameters
        ----------
        bandits: list[tuple(str, MAB)]
            The set of bandits to run the simulation with. Must be a list of tuples of an identifier for the bandit and
            the bandit object, of type mabwiser.mab.MAB or that inherits from mabwiser.base_mab.BaseMAB
        decisions : Union[List[Arm], np.ndarray, pd.Series]
            The decisions that are made.
        rewards : Union[List[Num], np.ndarray, pd.Series]
            The rewards that are received corresponding to the decisions.
        contexts : Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame]
            The context under which each decision is made. Default value is None.
        scaler: scaler
            One of the scalers from sklearn.preprocessing. Optional.
        test_size: float
            The fraction of data to use in the test set. Must be in the range (0, 1).
        is_ordered: bool
            Whether to divide the data randomly or to use the order given.
            When set to True, the test data will be the final n rows of the data set where n is determined by the split.
            When set to False, sklearn's train_test_split will be used.
        batch_size: int
            The batch size to test before partial fitting during _online learning.
            Cannot exceed the size of the test set.
            When batch size is 0, the simulation will be offline.
        evaluator: callable
            Function for scoring the predictions.
            Must have the function signature function(arm_to_stats_train: dictionary, predictions: list,
            decisions: np.ndarray, rewards: np.ndarray, stat: str, start_index: int, nn: bool).
        seed: num
            The seed for simulation
        is_quick: bool
            Flag to omit neighborhood statistics.
            Default value is False.
        log_file: str
            The logfile to store debug output. Optional.
        log_format: str
            The logger format used

        Raises
        ------
        TypeError   The bandit objects must be given in a list.
        TypeError   Each bandit object must be identified by a string label.
        TypeError   Each bandit must be of type MAB or inherit from BaseMAB.
        TypeError   The decisions must be given in a list, numpy array, or pandas Series.
        TypeError   The rewards must be given in a list, numpy array, or pandas series.
        TypeError   The contexts must be given in a 2D list, numpy array, pandas dataframe or pandas series.
        TypeError   The test_size size must be a float.
        TypeError   The batch size must be an integer.
        TypeError   The is_ordered flag must be a boolean.
        TypeError   The evaluation function must be callable.
        ValueError  The length of decisions and rewards must match.
        ValueError  The test_size size must be greater than 0 and less than 1.
        ValueError  The batch size cannot exceed the size of the test set.
        """

        self._validate_args(bandits=bandits, decisions=decisions, rewards=rewards, contexts=contexts,
                            test_size=test_size, ordered=is_ordered, batch_size=batch_size,
                            evaluation=evaluator, is_quick=is_quick)

        # Convert decisions, rewards and contexts to numpy arrays
        decisions = MAB._convert_array(decisions)
        rewards = MAB._convert_array(rewards)
        contexts = MAB._convert_matrix(contexts)

        # Save the simulation parameters
        self.bandits = bandits
        self.decisions = decisions
        self.rewards = rewards
        self.contexts = contexts
        self.scaler = scaler
        self.test_size = test_size
        self.is_ordered = is_ordered
        self.batch_size = batch_size
        self.evaluator = evaluator
        self.seed = seed
        self.is_quick = is_quick
        self.log_file = log_file
        self.log_format = log_format

        self._online = batch_size > 0
        self._chunk_size = 100

        # logger object
        self.logger = logging.getLogger()
        self.logger.setLevel(logging.DEBUG)

        # create console handler and set level to info
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.INFO)
        formatter = logging.Formatter(self.log_format)
        console_handler.setFormatter(formatter)
        self.logger.addHandler(console_handler)

        # create error file handler and set level to debug
        if self.log_file is not None:
            handler = logging.FileHandler(self.log_file, "w", encoding=None, delay="true")
            handler.setLevel(logging.DEBUG)
            formatter = logging.Formatter(self.log_format)
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)

        # set arms
        iter_name, iter_mab = self.bandits[0]
        self.arms = iter_mab.arms

        # Get the number of effective jobs for each bandit
        n_jobs_list = [BaseMAB._effective_jobs(math.ceil((len(decisions) * test_size)), mab.n_jobs)
                       for mab_name, mab in self.bandits]
        # set max n_jobs
        self.max_n_jobs = max(n_jobs_list)

        # Initialize statistic objects
        self.arm_to_stats_total = {}
        self.arm_to_stats_train = {}
        self.arm_to_stats_test = {}
        self.bandit_to_arm_to_stats_min = {}
        self.bandit_to_arm_to_stats_avg = {}
        self.bandit_to_arm_to_stats_max = {}
        self.bandit_to_confusion_matrices = {}

        # Test row metrics
        self.bandit_to_predictions = {}
        self.bandit_to_expectations = {}
        self.bandit_to_neighborhood_size = {}
        self.bandit_to_arm_to_stats_neighborhoods = {}
        self.test_indices = []

        # Log parameters
        self.logger.info('Simulation Parameters')
        self.logger.info("\t bandits: " + str(self.bandits))
        self.logger.info("\t scaler: " + str(self.scaler))
        self.logger.info("\t test_size: " + str(self.test_size))
        self.logger.info("\t is_ordered: " + str(self.is_ordered))
        self.logger.info("\t batch_size: " + str(self.batch_size))
        self.logger.info("\t evaluator: " + str(self.evaluator))
        self.logger.info("\t seed: " + str(self.seed))
        self.logger.info("\t is_quick: " + str(self.is_quick))
        self.logger.info("\t log_file: " + str(self.log_file))
        self.logger.info("\t format: " + self.log_format)

    # Public Methods
    def get_arm_stats(self, decisions: np.ndarray, rewards: np.ndarray) -> dict:
        """
        Calculates descriptive statistics for each arm in the provided data set.

        Parameters
        ----------
        decisions: np.ndarray
            The decisions to filter the rewards.
        rewards: np.ndarray
            The rewards to get statistics about.

        Returns
        -------
        Arm_to_stats dictionary.
        Dictionary has the format {arm {'count', 'sum', 'min', 'max', 'mean', 'std'}}
        """
        stats = dict((arm, {}) for arm in self.arms)
        for arm in self.arms:
            indices = np.where(decisions == arm)
            if indices[0].shape[0] > 0:
                arm_rewards = rewards[indices]
                stats[arm] = self.get_stats(arm_rewards)
            else:
                stats[arm] = {'count': 0, 'sum': 0, 'min': 0,
                              'max': 0, 'mean': 0, 'std': 0}
                self.logger.info('No historic data for ' + str(arm))
        return stats

    def plot(self, metric: str = 'avg', is_per_arm: bool = False) -> NoReturn:
        """
        Generates a plot of the cumulative sum of the rewards for each bandit.
        Simulation must be run before calling this method.

        Arguments
        ---------
        metric: str
            The bandit_to_arm_to_stats to use to generate the plot. Must be 'avg', 'min', or 'max
        is_per_arm: bool
            Whether to plot each arm separately or use an aggregate statistic.

        Raises
        ------
        AssertionError  Descriptive statics for predictions are missing.
        TypeError       Metric must be a string.
        TypeError       The per_arm flag must be a boolean.
        ValueError      The metric must be one of avg, min or max.

        Returns
        -------
        None
        """
        # Validate args
        check_true(isinstance(metric, str), TypeError('Metric must be a string.'))
        check_true(metric in ['avg', 'min', 'max'], ValueError('Metric must be one of avg, min or max.'))
        check_true(isinstance(is_per_arm, bool), TypeError('is_per_arm must be True or False.'))

        # Validate that simulation has been run
        complete = 'Complete simulation must be run before calling this method.'
        check_true(bool(self.bandit_to_arm_to_stats_min),
                   AssertionError('Descriptive statistics for predictions missing. ' + complete))

        if metric == 'avg':
            stats = self.bandit_to_arm_to_stats_avg
        elif metric == 'min':
            stats = self.bandit_to_arm_to_stats_min
        else:
            stats = self.bandit_to_arm_to_stats_max

        if self.batch_size > 0:
            cu_sums = {}
            labels = {}
            mabs = []

            if is_per_arm:
                for mab_name, mab in self.bandits:
                    self.logger.info('Plotting ' + str(mab_name))
                    for arm in self.arms:
                        mab_arm_name = str(mab_name) + '_' + str(arm)
                        mabs.append(mab_arm_name)
                        labels[mab_arm_name] = []
                        sums = []
                        cu_sums[mab_arm_name] = []
                        for key in stats[mab_name].keys():
                            if key != 'total':
                                labels[mab_arm_name].append(key)
                                if np.isnan(stats[mab_name][key][arm]['sum']):
                                    sums.append(0)
                                else:
                                    sums.append(stats[mab_name][key][arm]['sum'])
                        cs = 0
                        for item in sums:
                            cs += item
                            cu_sums[mab_arm_name].append(cs)
            else:
                for mab_name, mab in self.bandits:
                    self.logger.info('Plotting ' + str(mab_name))

                    mabs.append(mab_name)
                    labels[mab_name] = []
                    sums = []
                    cu_sums[mab_name] = []

                    for key in stats[mab_name].keys():
                        if key != 'total':

                            labels[mab_name].append(key)

                            net = 0
                            for arm in self.arms:
                                if np.isnan(stats[mab_name][key][arm]['sum']):
                                    continue

                                net += stats[mab_name][key][arm]['sum']
                            sums.append(net)
                    cs = 0

                    for item in sums:
                        cs += item
                        cu_sums[mab_name].append(cs)

            x = [i * self.batch_size for i in labels[mabs[0]]]
            for mab in mabs:
                sns.lineplot(x=x, y=cu_sums[mab], label=mab)
            plt.xlabel('Test Rows Predicted')
            plt.ylabel('Cumulative Reward')
            plt.show()

        else:
            x_labels = []
            y_values = []

            if is_per_arm:
                for mab_name, mab in self.bandits:
                    for arm in self.arms:
                        x_labels.append(str(mab_name) + '_' + str(arm))
                        if not np.isnan(stats[mab_name][arm]['sum']):
                            y_values.append(stats[mab_name][arm]['sum'])
                        else:
                            y_values.append(0)

            else:
                for mab_name, mab in self.bandits:
                    x_labels.append(mab_name)
                    cumulative = 0
                    for arm in self.arms:
                        if not np.isnan(stats[mab_name][arm]['sum']):
                            cumulative += stats[mab_name][arm]['sum']
                    y_values.append(cumulative)

            plt.bar(x_labels, y_values)
            plt.xlabel('Bandit')
            plt.ylabel('Cumulative Reward')
            plt.xticks(rotation=45)
            plt.show()

        plt.close('all')

    def run(self) -> NoReturn:
        """ Run simulator

        Runs a simulation concurrently for all bandits in the bandits list.

        Returns
        -------
        None
        """

        #####################################
        # Total Stats
        #####################################
        self.logger.info("\n")
        self._set_stats("total", self.decisions, self.rewards)

        #####################################
        # Train-Test Split
        #####################################
        self.logger.info("\n")
        self.logger.info("Train/Test Split")
        train_decisions, train_rewards, train_contexts, test_decisions, test_rewards, test_contexts = \
            self._run_train_test_split()

        self.logger.info('Train size: ' + str(len(train_decisions)))
        self.logger.info('Test size: ' + str(len(test_decisions)))

        #####################################
        # Scale the Data
        #####################################
        if self.scaler is not None:
            self.logger.info("\n")
            train_contexts, test_contexts = self._run_scaler(train_contexts, test_contexts)

        #####################################
        # Train/Test Stats
        #####################################
        self.logger.info("\n")
        self._set_stats("train", train_decisions, train_rewards)

        self.logger.info("\n")
        self._set_stats("test", test_decisions, test_rewards)

        #####################################
        # Fit the Training Data
        #####################################
        self.logger.info("\n")
        self._train_bandits(train_decisions, train_rewards, train_contexts)

        #####################################
        # Test the bandit simulation
        #####################################
        self.logger.info("\n")
        self.logger.info("Testing Bandits")
        if self._online:
            self._online_test_bandits(test_decisions, test_rewards, test_contexts)

        # If not running an _online simulation, evaluate the entire test set
        else:
            self._offline_test_bandits(test_decisions, test_rewards, test_contexts)

        self.logger.info('Simulation complete')

    # Private Methods
    def _get_partial_evaluation(self, name, i, decisions, predictions, rewards, start_index, nn=False):
        cfm = confusion_matrix(decisions, predictions)
        self.bandit_to_confusion_matrices[name].append(cfm)
        self.logger.info(str(name) + ' batch ' + str(i) + ' confusion matrix: ' + str(cfm))
        if nn and not self.is_quick:
            self.bandit_to_arm_to_stats_min[name][i] = self.evaluator(self.arms,
                                                                      decisions, rewards,
                                                                      predictions,
                                                                      (self.arm_to_stats_train,
                                                                       self.bandit_to_arm_to_stats_neighborhoods[
                                                                           name]),
                                                                      "min", start_index, nn)

            self.bandit_to_arm_to_stats_avg[name][i] = self.evaluator(self.arms,
                                                                      decisions, rewards,
                                                                      predictions,
                                                                      (self.arm_to_stats_train,
                                                                       self.bandit_to_arm_to_stats_neighborhoods[
                                                                           name]),
                                                                      "mean", start_index, nn)

            self.bandit_to_arm_to_stats_max[name][i] = self.evaluator(self.arms,
                                                                      decisions, rewards,
                                                                      predictions,
                                                                      (self.arm_to_stats_train,
                                                                       self.bandit_to_arm_to_stats_neighborhoods[
                                                                           name]),
                                                                      "max", start_index, nn)
        else:
            self.bandit_to_arm_to_stats_min[name][i] = self.evaluator(self.arms,
                                                                      decisions, rewards,
                                                                      predictions,
                                                                      self.arm_to_stats_train, "min",
                                                                      start_index, False)

            self.bandit_to_arm_to_stats_avg[name][i] = self.evaluator(self.arms,
                                                                      decisions, rewards,
                                                                      predictions,
                                                                      self.arm_to_stats_train, "mean",
                                                                      start_index, False)

            self.bandit_to_arm_to_stats_max[name][i] = self.evaluator(self.arms,
                                                                      decisions, rewards,
                                                                      predictions,
                                                                      self.arm_to_stats_train, "max",
                                                                      start_index, False)
        self.logger.info(name + ' ' + str(self.bandit_to_arm_to_stats_min[name][i]))
        self.logger.info(name + ' ' + str(self.bandit_to_arm_to_stats_avg[name][i]))
        self.logger.info(name + ' ' + str(self.bandit_to_arm_to_stats_max[name][i]))

    def _offline_test_bandits(self, test_decisions, test_rewards, test_contexts):
        """
        Performs offline prediction.

        Arguments
        ---------
        test_decisions: np.ndarray
            The test set decisions.
        test_rewards: np.ndarray
            The test set rewards.
        test_contexts: np.ndarray
            The test set contexts.
        """

        chunk_start_index = [idx for idx in range(int(math.ceil(len(test_decisions) / self._chunk_size)))]
        for idx in chunk_start_index:

            # Set distances to None for new chunk
            distances = None

            # Progress update
            self.logger.info("Chunk " + str(idx + 1) + " out of " + str(len(chunk_start_index)))

            start = idx * self._chunk_size
            stop = min((idx+1)*self._chunk_size, len(test_decisions))
            chunk_decision = test_decisions[start:stop]
            chunk_contexts = test_contexts[start:stop] if test_contexts is not None else None

            for name, mab in self.bandits:

                if mab.is_contextual:
                    if isinstance(mab, _NeighborsSimulator):
                        if distances is None:
                            distances = mab.calculate_distances(chunk_contexts)
                        else:
                            mab.set_distances(distances)
                        predictions = mab.predict(chunk_contexts)
                        expectations = mab.row_arm_to_expectation[start:stop].copy()

                    else:
                        predictions = mab.predict(chunk_contexts)
                        if isinstance(mab._imp, _Neighbors):
                            expectations = mab._imp.arm_to_expectation.copy()
                        else:
                            expectations = mab.predict_expectations(chunk_contexts)

                    if not isinstance(expectations, list):
                        expectations = [expectations]
                    self.bandit_to_expectations[name] = self.bandit_to_expectations[name] + expectations

                else:
                    predictions = [mab.predict() for _ in range(len(chunk_decision))]

                if not isinstance(predictions, list):
                    predictions = [predictions]

                self.bandit_to_predictions[name] = self.bandit_to_predictions[name] + predictions

                if isinstance(mab, _NeighborsSimulator) and not self.is_quick:
                    self.bandit_to_arm_to_stats_neighborhoods[name] = mab.neighborhood_arm_to_stat.copy()

        for name, mab in self.bandits:
            nn = isinstance(mab, _NeighborsSimulator)

            if not mab.is_contextual:
                self.bandit_to_expectations[name] = mab._imp.arm_to_expectation.copy()
            if isinstance(mab, _NeighborsSimulator) and not self.is_quick:
                self.bandit_to_neighborhood_size[name] = mab.neighborhood_sizes.copy()

            # Evaluate the predictions
            self.bandit_to_confusion_matrices[name].append(confusion_matrix(test_decisions,
                                                                            self.bandit_to_predictions[name]))

            self.logger.info(name + " confusion matrix: " + str(self.bandit_to_confusion_matrices[name]))

            if nn and not self.is_quick:
                self.bandit_to_arm_to_stats_min[name] = self.evaluator(self.arms,
                                                                       test_decisions, test_rewards,
                                                                       self.bandit_to_predictions[name],
                                                                       (self.arm_to_stats_train,
                                                                        self.bandit_to_arm_to_stats_neighborhoods[
                                                                            name]),
                                                                       stat="min", start_index=0, nn=nn)

                self.bandit_to_arm_to_stats_avg[name] = self.evaluator(self.arms,
                                                                       test_decisions, test_rewards,
                                                                       self.bandit_to_predictions[name],
                                                                       (self.arm_to_stats_train,
                                                                        self.bandit_to_arm_to_stats_neighborhoods[
                                                                            name]),
                                                                       stat="mean", start_index=0, nn=nn)

                self.bandit_to_arm_to_stats_max[name] = self.evaluator(self.arms,
                                                                       test_decisions, test_rewards,
                                                                       self.bandit_to_predictions[name],
                                                                       (self.arm_to_stats_train,
                                                                        self.bandit_to_arm_to_stats_neighborhoods[
                                                                            name]),
                                                                       stat="max", start_index=0, nn=nn)
            else:
                self.bandit_to_arm_to_stats_min[name] = self.evaluator(self.arms,
                                                                       test_decisions, test_rewards,
                                                                       self.bandit_to_predictions[name],
                                                                       self.arm_to_stats_train, stat="min",
                                                                       start_index=0, nn=False)

                self.bandit_to_arm_to_stats_avg[name] = self.evaluator(self.arms,
                                                                       test_decisions, test_rewards,
                                                                       self.bandit_to_predictions[name],
                                                                       self.arm_to_stats_train, stat="mean",
                                                                       start_index=0, nn=False)

                self.bandit_to_arm_to_stats_max[name] = self.evaluator(self.arms,
                                                                       test_decisions, test_rewards,
                                                                       self.bandit_to_predictions[name],
                                                                       self.arm_to_stats_train, stat="max",
                                                                       start_index=0, nn=False)

            self.logger.info(name + " minimum analysis " + str(self.bandit_to_arm_to_stats_min[name]))
            self.logger.info(name + " average analysis " + str(self.bandit_to_arm_to_stats_avg[name]))
            self.logger.info(name + " maximum analysis " + str(self.bandit_to_arm_to_stats_max[name]))

    def _online_test_bandits(self, test_decisions, test_rewards, test_contexts):
        """
        Performs _online prediction and partial fitting for each model.

        Arguments
        ---------
        test_decisions: np.ndarray
            The test set decisions.
        test_rewards: np.ndarray
            The test set rewards.
        test_contexts: np.ndarray
            The test set contexts.
        """

        # Divide the test data into batches and chunk the batches based on size
        self._online_test_bandits_chunks(test_decisions, test_rewards, test_contexts)

        # Final scores for all predictions
        for name, mab in self.bandits:
            nn = isinstance(mab, _NeighborsSimulator)

            self._get_partial_evaluation(name, 'total', test_decisions, self.bandit_to_predictions[name],
                                         test_rewards, 0, nn)

            if isinstance(mab, _NeighborsSimulator) and not self.is_quick:
                self.bandit_to_neighborhood_size[name] = mab.neighborhood_sizes.copy()
                self.bandit_to_arm_to_stats_neighborhoods[name] = mab.neighborhood_arm_to_stat.copy()

    def _online_test_bandits_chunks(self, test_decisions, test_rewards, test_contexts):
        """
        Performs _online prediction and partial fitting for each model.

        Arguments
        ---------
        test_decisions: np.ndarray
            The test set decisions.
        test_rewards: np.ndarray
            The test set rewards.
        test_contexts: np.ndarray
            The test set contexts.
        """

        # Divide the test data into batches
        start = 0
        for i in range(0, int(math.ceil(len(test_decisions) / self.batch_size))):
            self.logger.info('Starting batch ' + str(i))

            # Stop at the next batch_size interval or the end of the test data
            stop = min(start + self.batch_size, len(test_decisions) + 1)

            batch_contexts = test_contexts[start:stop] if test_contexts is not None else None
            batch_decisions = test_decisions[start:stop]
            batch_rewards = test_rewards[start:stop]
            batch_predictions = {}
            batch_expectations = {}

            chunk_start = 0

            # Divide the batch into chunks
            for j in range(0, int(math.ceil(self.batch_size / self._chunk_size))):
                distances = None
                chunk_stop = min(chunk_start + self._chunk_size, self.batch_size)
                chunk_contexts = batch_contexts[chunk_start:chunk_stop] if batch_contexts is not None else None
                chunk_decisions = batch_decisions[chunk_start:chunk_stop]

                for name, mab in self.bandits:

                    if name not in batch_predictions.keys():
                        batch_predictions[name] = []
                        batch_expectations[name] = []

                    # Predict for the batch
                    if mab.is_contextual:
                        if isinstance(mab, _NeighborsSimulator):
                            if distances is None:
                                distances = mab.calculate_distances(chunk_contexts)
                                self.logger.info('Distances calculated')
                            else:
                                mab.set_distances(distances)
                                self.logger.info('Distances set')
                            predictions = mab.predict(chunk_contexts)
                            expectations = mab.row_arm_to_expectation[start+chunk_start:start+chunk_stop].copy()
                        else:
                            predictions = mab.predict(chunk_contexts)
                            expectations = mab.predict_expectations(chunk_contexts)

                        if self.batch_size == 1:
                            predictions = [predictions]

                    else:
                        predictions = [mab.predict() for _ in range(len(chunk_decisions))]
                        expectations = mab._imp.arm_to_expectation.copy()

                    # If a single prediction was returned, put it into a list
                    if not isinstance(predictions, list):
                        predictions = [predictions]
                    if not isinstance(expectations, list):
                        expectations = [expectations]

                    batch_predictions[name] = batch_predictions[name] + predictions
                    batch_expectations[name] = batch_expectations[name] + expectations

            for name, mab in self.bandits:
                if not mab.is_contextual:
                    batch_expectations[name] = [mab._imp.arm_to_expectation.copy()]

                nn = isinstance(mab, _NeighborsSimulator)

                # Add predictions from this batch
                self.bandit_to_predictions[name] = self.bandit_to_predictions[name] + batch_predictions[name]
                self.bandit_to_expectations[name] = self.bandit_to_expectations[name] + batch_expectations[name]

                if isinstance(mab, _RadiusSimulator) and not self.is_quick:
                    self.bandit_to_neighborhood_size[name] = mab.neighborhood_sizes.copy()
                if isinstance(mab, _NeighborsSimulator) and not self.is_quick:
                    self.bandit_to_arm_to_stats_neighborhoods[name] = mab.neighborhood_arm_to_stat.copy()

                # Evaluate the predictions
                self._get_partial_evaluation(name, i, batch_decisions, batch_predictions[name],
                                             batch_rewards, start, nn)

                # Update the model
                if mab.is_contextual:
                    mab.partial_fit(batch_decisions, batch_rewards, batch_contexts)
                else:
                    mab.partial_fit(batch_decisions, batch_rewards)
                self.logger.info(name + ' updated')

            # Update start value for next batch
            start += self.batch_size

    def _run_scaler(self, train_contexts, test_contexts):
        """
        Scales the train and test contexts with the scaler provided to the simulator constructor.

        Arguments
        ---------
        train_contexts: np.ndarray
            The training set contexts.
        test_contexts: np.ndarray
            The test set contexts.

        Returns
        -------
            The scaled train_contexts and test_contexts
        """

        self.logger.info("Train/Test Scale")

        train_contexts = self.scaler.fit_transform(train_contexts)
        test_contexts = self.scaler.transform(test_contexts)
        return train_contexts, test_contexts

    def _run_train_test_split(self):
        """
        Performs a train-test split with the test set containing a percentage of the data determined by test_size.

        If is_ordered is true, performs a chronological split.
        Otherwise uses sklearn's train_test_split

        Returns
        -------
            The train and test decisions, rewards and contexts
        """

        if self.is_ordered:
            train_size = int(len(self.decisions) * (1 - self.test_size))
            train_decisions = self.decisions[:train_size]
            train_rewards = self.rewards[:train_size]
            train_contexts = self.contexts[:train_size] if self.contexts is not None else None
            # The test arrays are re-indexed to 0 automatically
            test_decisions = self.decisions[train_size:]
            test_rewards = self.rewards[train_size:]
            test_contexts = self.contexts[train_size:] if self.contexts is not None else None
            self.test_indices = [x for x in range(train_size, len(self.decisions))]

        else:
            indices = [x for x in range(len(self.decisions))]
            if self.contexts is None:

                train_contexts, test_contexts = None, None

                train_indices, test_indices, train_decisions, test_decisions, train_rewards, test_rewards = \
                    train_test_split(indices, self.decisions, self.rewards, test_size=self.test_size,
                                     random_state=self.seed)
            else:

                train_indices, test_indices, train_decisions, test_decisions, train_rewards, test_rewards, \
                    train_contexts, test_contexts = \
                    train_test_split(indices, self.decisions, self.rewards, self.contexts,
                                     test_size=self.test_size, random_state=self.seed)
            self.test_indices = test_indices

        # Use memory limits for the nearest neighbors shared distance list to determine chunk size.
        # The list without chunking contains len(test_decisions) elements
        # each of which is an np.ndarray with len(train_decisions) distances.
        # Approximate as 8 bytes per element in each numpy array to give the size of the list in GB.
        distance_list_size = len(test_decisions) * (8 * len(train_decisions)) / 1e9

        # If there is more than one test row and contexts have been provided:
        if distance_list_size > 1.0 and train_contexts is not None:

            # Set the chunk size to contain 1GB per job
            gb_chunk_size = int(len(test_decisions) / distance_list_size) * self.max_n_jobs

            # If the length of the test set is less than the chunk size, chunking is unnecessary
            self._chunk_size = min(gb_chunk_size, len(test_decisions))

        # If there is only one test row or all MABs are context-free chunking is unnecessary:
        else:
            self._chunk_size = len(test_decisions)

        return train_decisions, train_rewards, train_contexts, test_decisions, test_rewards, test_contexts

    def _set_stats(self, scope, decisions, rewards):
        """
        Calculates descriptive statistics for each arm for the specified data set
        and stores them to the corresponding arm_to_stats dictionary.

        Arguments
        ---------
        scope: str
            The label for which set is being evaluated.
            Accepted values: 'total', 'train', 'test'
        decisions: np.ndarray
            The decisions to filter the rewards.
        rewards: np.ndarray
            The rewards to get statistics about.

        Returns
        -------
        None
        """

        if scope == 'total':
            self.arm_to_stats_total = self.get_arm_stats(decisions, rewards)
            self.logger.info("Total Stats")
            self.logger.info(self.arm_to_stats_total)
        elif scope == 'train':
            self.arm_to_stats_train = self.get_arm_stats(decisions, rewards)
            self.logger.info("Train Stats")
            self.logger.info(self.arm_to_stats_train)
        elif scope == 'test':
            self.arm_to_stats_test = self.get_arm_stats(decisions, rewards)
            self.logger.info("Test Stats")
            self.logger.info(self.arm_to_stats_test)
        else:
            raise ValueError("Unsupported scope name")

    def _train_bandits(self, train_decisions, train_rewards, train_contexts=None):
        """
        Trains each of the bandit models.

        Arguments
        ---------
        train_decisions: np.ndarray
            The training set decisions.
        train_rewards: np.ndarray
            The training set rewards.
        train_contexts: np.ndarray
            The training set contexts.
        """

        self.logger.info("Training Bandits")

        new_bandits = []
        for name, mab in self.bandits:
            # Add the current bandit
            self.bandit_to_predictions[name] = []
            self.bandit_to_expectations[name] = []
            self.bandit_to_neighborhood_size[name] = []
            self.bandit_to_arm_to_stats_neighborhoods[name] = []
            self.bandit_to_confusion_matrices[name] = []
            self.bandit_to_arm_to_stats_min[name] = {}
            self.bandit_to_arm_to_stats_avg[name] = {}
            self.bandit_to_arm_to_stats_max[name] = {}

            if isinstance(mab, MAB):
                imp = mab._imp
            else:
                imp = mab
            if isinstance(imp, _Radius):
                mab = _RadiusSimulator(imp.rng, imp.arms, imp.n_jobs, imp.backend, imp.lp, imp.radius,
                                       imp.metric, is_quick=self.is_quick,
                                       no_nhood_prob_of_arm=imp.no_nhood_prob_of_arm)

            elif isinstance(imp, _KNearest):
                mab = _KNearestSimulator(imp.rng, imp.arms, imp.n_jobs, imp.backend, imp.lp, imp.k,
                                         imp.metric, is_quick=self.is_quick)

            new_bandits.append((name, mab))
            if mab.is_contextual:
                mab.fit(train_decisions, train_rewards, train_contexts)
            else:
                mab.fit(train_decisions, train_rewards)
            self.logger.info(name + ' trained')

        self.bandits = new_bandits

    # Static Methods
    @staticmethod
    def get_stats(rewards: np.ndarray) -> dict:
        """Calculates descriptive statistics for the given array of rewards.

        Parameters
        ----------
        rewards: nd.nparray
            Array of rewards for a single arm.

        Returns
        -------
        A dictionary of descriptive statistics.
        Dictionary has the format {'count', 'sum', 'min', 'max', 'mean', 'std'}
        """
        return {'count': rewards.size, 'sum': rewards.sum(), 'min': rewards.min(),
                'max': rewards.max(), 'mean': rewards.mean(), 'std': rewards.std()}

    @staticmethod
    def _validate_args(bandits, decisions, rewards, contexts, test_size, ordered, batch_size,
                       evaluation, is_quick):
        """
        Validates the simulation parameters.
        """
        check_true(isinstance(bandits, list), TypeError('Bandits must be provided in a list.'))
        for pair in bandits:
            name, mab = pair
            check_true(isinstance(name, str), TypeError('All bandits must be identified by strings.'))
            check_true(isinstance(mab, (MAB, BaseMAB)),
                       TypeError('All bandits must be MAB objects or inherit from BaseMab.'))

        # Type check for decisions
        check_true(isinstance(decisions, (list, np.ndarray, pd.Series)),
                   TypeError("The decisions should be given as list, numpy array, or pandas series."))

        # Type check for rewards
        check_true(isinstance(rewards, (list, np.ndarray, pd.Series)),
                   TypeError("The rewards should be given as list, numpy array, or pandas series."))

        # Type check for contexts --don't use "if contexts" since it's n-dim array
        if contexts is not None:
            if isinstance(contexts, np.ndarray):
                check_true(contexts.ndim == 2,
                           TypeError("The contexts should be given as 2D list, numpy array, or pandas series or "
                                     "data frames."))
            elif isinstance(contexts, list):
                check_true(np.array(contexts).ndim == 2,
                           TypeError("The contexts should be given as 2D list, numpy array, or pandas series or "
                                     "data frames."))
            else:
                check_true(isinstance(contexts, (pd.Series, pd.DataFrame)),
                           TypeError("The contexts should be given as 2D list, numpy array, or pandas series or "
                                     "data frames."))

        # Length check for decisions and rewards
        check_true(len(decisions) == len(rewards), ValueError("Decisions and rewards should be same length."))

        check_true(isinstance(test_size, float), TypeError("Test size must be a float."))
        check_true(0.0 < test_size < 1.0, ValueError("Test size must be greater than zero and less than one."))
        check_true(isinstance(ordered, bool), TypeError("Ordered must be a boolean."))
        check_true(isinstance(batch_size, int), TypeError("Batch size must be an integer."))
        if batch_size > 0:
            check_true(batch_size <= (math.ceil(len(decisions) * test_size)),
                       ValueError("Batch size cannot be larger than " "the test set."))

        check_true(callable(evaluation), TypeError("Evaluation method must be a function."))

        check_true(isinstance(is_quick, bool), TypeError('Quick run flag must be a boolean.'))