# -*- coding: utf-8 -*-

import random
import logging
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tests.test_base import BaseTest

from mabwiser.mab import MAB, LearningPolicy, NeighborhoodPolicy
from mabwiser.simulator import Simulator


logging.disable(logging.CRITICAL)


class ExampleTest(BaseTest):

    def test_popularity(self):

        list_of_arms = ['Arm1', 'Arm2']
        decisions = ['Arm1', 'Arm1', 'Arm2', 'Arm1']
        rewards = [20, 17, 25, 9]
        mab = MAB(list_of_arms, LearningPolicy.Popularity())
        mab.fit(decisions, rewards)
        mab.predict()
        self.assertEqual("Arm2", mab.predict())
        self.assertDictEqual({'Arm1': 0.38016528925619836, 'Arm2': 0.6198347107438016},
                             mab.predict_expectations())

    def test_random(self):

        arm, mab = self.predict(arms=[1, 2],
                                decisions=[1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1],
                                rewards=[10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                learning_policy=LearningPolicy.Random(),
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, 2)

        layout_partial = [1, 2, 1, 2]
        revenue_partial = [0, 12, 7, 19]

        mab.partial_fit(decisions=layout_partial, rewards=revenue_partial)

        mab.add_arm(3)
        self.assertTrue(3 in mab.arms)
        self.assertTrue(3 in mab._imp.arm_to_expectation.keys())

    def test_greedy15(self):

        arm, mab = self.predict(arms=[1, 2],
                                decisions=[1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1],
                                rewards=[10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, 1)

        layout_partial = [1, 2, 1, 2]
        revenue_partial = [0, 12, 7, 19]

        mab.partial_fit(decisions=layout_partial, rewards=revenue_partial)

        mab.add_arm(3)
        self.assertTrue(3 in mab.arms)
        self.assertTrue(3 in mab._imp.arm_to_expectation.keys())

    def test_linucb(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1.25),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 2])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

    def test_softmax(self):
        arm, mab = self.predict(arms=[1, 2],
                                decisions=[1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1],
                                rewards=[10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                learning_policy=LearningPolicy.Softmax(tau=1),
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, 2)

        layout_partial = [1, 2, 1, 2]
        revenue_partial = [0, 12, 7, 19]

        mab.partial_fit(decisions=layout_partial, rewards=revenue_partial)

        mab.add_arm(3)
        self.assertTrue(3 in mab.arms)
        self.assertTrue(3 in mab._imp.arm_to_expectation.keys())

    def test_lints(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=1.5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 2])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

    def test_ts(self):

        dec_to_threshold = {1: 10, 2: 20}

        def binarize(dec, value):
            return value >= dec_to_threshold[dec]

        arm, mab = self.predict(arms=[1, 2],
                                decisions=[1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1],
                                rewards=[10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                learning_policy=LearningPolicy.ThompsonSampling(binarizer=binarize),
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, 1)

        layout_partial = [1, 2, 1, 2]
        revenue_partial = [0, 12, 7, 19]

        mab.partial_fit(decisions=layout_partial, rewards=revenue_partial)

        # Updating of the model with new arm
        def binary_func2(decision, reward):
            if decision == 3:
                return 1 if reward > 15 else 0
            else:
                return 1 if reward > 10 else 0

        mab.add_arm(3, binary_func2)
        self.assertTrue(3 in mab.arms)
        self.assertTrue(3 in mab._imp.arm_to_expectation.keys())

    def test_ts_binary(self):
        arm, mab = self.predict(arms=[1, 2],
                                decisions=[1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1],
                                rewards=[1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1],
                                learning_policy=LearningPolicy.ThompsonSampling(),
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, 2)

        layout_partial = [1, 2, 1, 2]
        revenue_partial = [0, 1, 0, 1]

        mab.partial_fit(decisions=layout_partial, rewards=revenue_partial)

        mab.add_arm(3)
        self.assertTrue(3 in mab.arms)
        self.assertTrue(3 in mab._imp.arm_to_expectation.keys())

    def test_ucb1(self):
        arm, mab = self.predict(arms=[1, 2],
                                decisions=[1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1],
                                rewards=[10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                learning_policy=LearningPolicy.UCB1(alpha=1.25),
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, 2)
        layout_partial = [1, 2, 1, 2]
        revenue_partial = [0, 12, 7, 19]

        mab.partial_fit(decisions=layout_partial, rewards=revenue_partial)

        mab.add_arm(3)
        self.assertTrue(3 in mab.arms)
        self.assertTrue(3 in mab._imp.arm_to_expectation.keys())

    def test_ts_series(self):

        df = pd.DataFrame({'layouts': [1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1],
                           'revenues': [10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]})

        arm, mab = self.predict(arms=[1, 2],
                                decisions=df['layouts'],
                                rewards=df['revenues'],
                                learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, 1)

    def test_ts_numpy(self):
        arm, mab = self.predict(arms=[1, 2],
                                decisions=np.array([1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1]),
                                rewards=np.array([10, 17, 22, 9, 4, 0, 7, 8, 20, 9, 50, 5, 7, 12, 10]),
                                learning_policy=LearningPolicy.EpsilonGreedy(epsilon=0.15),
                                seed=123456,
                                num_run=1,
                                is_predict=True)

        self.assertEqual(arm, 1)

    def test_radius(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.UCB1(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [4, 4])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

    def test_nearest(self):
        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})
        test_df_revenue = pd.Series([7, 13])

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.UCB1(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.KNearest(k=5),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [5, 1])

        mab.partial_fit(decisions=arms, rewards=test_df_revenue, contexts=test)

        mab.add_arm(6)
        self.assertTrue(6 in mab.arms)
        self.assertTrue(6 in mab._imp.arm_to_expectation.keys())

    def test_linucb_radius(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=1),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2])

    def test_linucb_knearest(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1.25),
                                 neighborhood_policy=NeighborhoodPolicy.KNearest(k=4),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2])

    def test_lints_radius(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=0.5),
                                 neighborhood_policy=NeighborhoodPolicy.Radius(radius=1),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2])

    def test_lints_knearest(self):

        train_df = pd.DataFrame({'ad': [1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 4, 5, 3, 2, 5],
                                 'revenues': [10, 17, 22, 9, 4, 20, 7, 8, 20, 9, 50, 5, 7, 12, 10],
                                 'age': [22, 27, 39, 48, 21, 20, 19, 37, 52, 26, 18, 42, 55, 57, 38],
                                 'click_rate': [0.2, 0.6, 0.99, 0.68, 0.15, 0.23, 0.75, 0.17,
                                                0.33, 0.65, 0.56, 0.22, 0.19, 0.11, 0.83],
                                 'subscriber': [1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0]}
                                )

        # Test data to for new prediction
        test_df = pd.DataFrame({'age': [37, 52], 'click_rate': [0.5, 0.6], 'subscriber': [0, 1]})

        # Scale the data
        scaler = StandardScaler()
        train = scaler.fit_transform(np.asarray(train_df[['age', 'click_rate', 'subscriber']], dtype='float64'))
        test = scaler.transform(np.asarray(test_df, dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4, 5],
                                 decisions=train_df['ad'],
                                 rewards=train_df['revenues'],
                                 learning_policy=LearningPolicy.LinTS(alpha=1),
                                 neighborhood_policy=NeighborhoodPolicy.KNearest(k=4),
                                 context_history=train,
                                 contexts=test,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [1, 2])

    def test_simulator_contextual(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        def binarize(decision, reward):

            if decision == 0:
                return reward <= 50
            else:
                return reward >= 220

        n_jobs = 1
        contextual_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10),
                                          n_jobs=n_jobs)),
                           ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize),
                                                    NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15),
                                                 NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                           ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), NeighborhoodPolicy.Radius(10),
                                           n_jobs=n_jobs))]

        sim = Simulator(contextual_mabs, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)

    def test_simulator_context_free(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]

        def binarize(decision, reward):

            if decision == 0:
                return reward <= 50
            else:
                return reward >= 220

        n_jobs = 1
        context_free_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs)),
                             ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), n_jobs=n_jobs)),
                             ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize),
                                                      n_jobs=n_jobs)),
                             ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15), n_jobs=n_jobs)),
                             ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), n_jobs=n_jobs))]

        sim = Simulator(context_free_mabs, decisions, rewards, contexts=None,
                        scaler=None, test_size=0.5, is_ordered=False, batch_size=1, seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)

    def test_simulator_mixed(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                 ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))]

        sim = Simulator(mixed, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)

    def test_simulator_hyper_parameter(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        hyper_parameter_tuning = []
        for radius in range(6, 10):
            hyper_parameter_tuning.append(('Radius' + str(radius),
                                           MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius),
                                               n_jobs=n_jobs)))

        sim = Simulator(hyper_parameter_tuning, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456,
                        is_quick=True)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions)