python source code of apriori

from time import time, sleep

from ..core import Agent
from ..utils import *

from cryptotrader.models import apriori as models
from cryptotrader.optimizers import gradient as gd
from cryptotrader.optimizers import gt
from cryptotrader.models import risk

import optunity as ot
import pandas as pd
import talib as tl
from decimal import Decimal
from datetime import timedelta
from numpy import diag, sqrt, log, trace
from numpy.linalg import inv

from ..exceptions import *

from scipy.signal import argrelextrema
from scipy.optimize import minimize
from scipy import stats
import cvxopt as opt
import cvxopt.solvers as optsolvers
import warnings
import talib as ta
optsolvers.options['show_progress'] = False


# Base class
class APrioriAgent(Agent):
    """
    Apriori abstract trading agent.
    Use this class to create trading strategies and deploy to Trading environment
    to train and deploy models directly into the market
    """
    def __init__(self, fiat, name=""):
        """

        :param fiat: str: symbol to use as quote
        :param name: str: agent name
        """
        super().__init__(name=name)
        self.epsilon = 1e-16
        self.fiat = fiat
        self.step = 0
        self.name = name
        self.log = {}

    # Model methods
    def predict(self, obs):
        """
        Select action on actual observation
        :param obs:
        :return:
        """
        raise NotImplementedError()

    def rebalance(self, obs):
        return NotImplementedError()

    # Train methods
    def set_params(self, **kwargs):
        raise NotImplementedError("You must overwrite this class in your implementation.")

    def fit(self, env, nb_steps, batch_size, search_space, constraints=None, action_repetition=1, callbacks=None, verbose=1,
            visualize=False, nb_max_start_steps=0, start_step_policy=None, log_interval=10000, start_step=0,
            nb_max_episode_steps=None, noise_abs=0.0):
        """
        Fit the model on parameters on the environment
        :param env: BacktestEnvironment instance
        :param nb_steps: Number of optimization evals
        :param batch_size: Size of the batch for each optimization pass
        :param search_space: Parameter search space
        :param constrains: Function returning False when constrains are violated
        :param action_repetition:
        :param callbacks:
        :param verbose:
        :param visualize:
        :param nb_max_start_steps:
        :param start_step_policy:
        :param log_interval:
        :param nb_max_episode_steps: Number of steps for one episode
        :param noise_abs: Noise radius to use on sample runs
        :return: tuple: Optimal parameters, information about the optimization process
        """
        try:
            # Initialize train
            env.training = True
            i = 0
            t0 = time()

            if verbose:
                print("Optimizing model for %d steps with batch size %d..." % (nb_steps, batch_size))

            ### First, optimize benchmark
            env.optimize_benchmark(nb_steps * 100, verbose=True)

            ## Now optimize model w.r.t benchmark
            # First define optimization constrains
            # Ex constrain:
            # @ot.constraints.constrained([lambda mean_type,
            #         ma1,
            #         ma2,
            #         std_span,
            #         alpha_up,
            #         alpha_down: ma1 < ma2])

            if not constraints:
                constraints = [lambda *args, **kwargs: True]

            # Initialize buffer
            optimization_rewards = []

            # Then, define optimization routine
            @ot.constraints.constrained(constraints)
            @ot.constraints.violations_defaulted(-100)
            def find_hp(**kwargs):
                try:
                    # Init variables
                    nonlocal i, nb_steps, t0, env, nb_max_episode_steps, optimization_rewards

                    # Sample params
                    self.set_params(**kwargs)

                    # Try model for a batch
                    # sample environment
                    r, rstd = self.test(env,
                                    nb_episodes=batch_size,
                                    action_repetition=action_repetition,
                                    callbacks=callbacks,
                                    visualize=visualize,
                                    nb_max_episode_steps=nb_max_episode_steps,
                                    nb_max_start_steps=nb_max_start_steps,
                                    start_step_policy=start_step_policy,
                                    start_step=start_step,
                                    noise_abs=noise_abs,
                                    verbose=False)

                    # Log batch reward
                    optimization_rewards.append(r)

                    # Increment step counter
                    i += 1

                    # Update progress
                    if verbose:
                        print("Optimization step {0}/{1}, r: {2:.8f}, r std: {3:.8f}, mean r: {4:.8f} ETC: {5}                     ".format(i,
                                                                            nb_steps,
                                                                            r,
                                                                            rstd,
                                                                            np.mean(optimization_rewards),
                                                                            str(pd.to_timedelta((time() - t0) * (nb_steps - i), unit='s'))),
                              end="\r")
                        t0 = time()

                    # Average rewards and return
                    return r

                except KeyboardInterrupt:
                    raise ot.api.fun.MaximumEvaluationsException(0)

            # Define params search space
            # Ex search space:
            #
            # hp = {
            #     'ma1': [2, env.obs_steps],
            #     'ma2': [2, env.obs_steps],
            #     'std_span': [2, env.obs_steps],
            #     'alpha_up': [1e-8, 1],
            #     'alpha_down': [1e-8, 1]
            #     }
            #
            # search_space = {'mean_type':{'simple': hp,
            #                              'exp': hp,
            #                              'kama': hp
            #                              }
            #                 }

            print("\nOptimizing model...")

            # Call optimizer
            opt_params, info, _ = ot.maximize_structured(find_hp,
                                              num_evals=nb_steps,
                                              search_space=search_space
                                              )

            # Update model params with optimal
            self.set_params(**opt_params)

            # Set flag off
            env.training = False

            # Return optimal params and information
            return opt_params, info

        except KeyboardInterrupt:

            # If interrupted, clean after yourself
            env.training = False
            print("\nOptimization interrupted by user.")
            return opt_params, info


# Test and benchmark
class TestAgent(APrioriAgent):
    """
    Test agent for debugging
    """
    def __repr__(self):
        return "Test"

    def __init__(self, obs_shape, fiat="BTC"):
        super().__init__(fiat)
        self.obs_shape = obs_shape

    def predict(self, obs):
        # Assert obs is valid
        assert obs.shape == self.obs_shape, "Wrong obs shape."

        for val in obs.applymap(lambda x: isinstance(x, Decimal) and Decimal.is_finite(x)).all():
            assert val, ("Non decimal value found in obs.", obs.applymap(lambda x: isinstance(x, Decimal) and Decimal.is_finite(x)).all())

        if self.step == 0:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            return array_normalize(action)
        else:
            return self.get_portfolio_vector(obs)

    def rebalance(self, obs):
        return self.predict(obs.apply(convert_to.decimal, raw=True))

    def test(self, env, nb_episodes=1, action_repetition=1, callbacks=None, visualize=False,
             nb_max_episode_steps=None, nb_max_start_steps=0, start_step_policy=None, verbose=False):
        """
        Test agent on environment
        """
        try:
            # Get env params
            self.fiat = env._fiat

            # Reset observations
            env.reset_status()
            env.reset()

            # Get max episode length
            if nb_max_episode_steps is None:
                nb_max_episode_steps = env.data_length

            #Reset counters
            t0 = time()
            self.step = 0
            episode_reward = 1

            while True:
                try:
                    action = self.rebalance(env.get_observation(True))
                    obs, reward, _, status = env.step(action)
                    episode_reward *= np.float64(reward)

                    self.step += 1

                    if visualize:
                        env.render()

                    if verbose:
                        print(">> step {0}/{1}, {2} % done, Cumulative Reward: {3}, ETC: {4}, Samples/s: {5:.04f}                   ".format(
                            self.step,
                            nb_max_episode_steps - env.obs_steps - 2,
                            int(100 * self.step / (nb_max_episode_steps - env.obs_steps - 2)),
                            episode_reward,
                            str(pd.to_timedelta((time() - t0) * ((nb_max_episode_steps - env.obs_steps - 2)
                                                                 - self.step), unit='s')),
                            1 / (time() - t0)
                        ), end="\r", flush=True)
                        t0 = time()

                    if status['OOD'] or self.step == nb_max_episode_steps:
                        return episode_reward

                    if status['Error']:
                        # e = status['Error']
                        # print("Env error:",
                        #       type(e).__name__ + ' in line ' + str(e.__traceback__.tb_lineno) + ': ' + str(e))
                        break

                except Exception as e:
                    print("Model Error:",
                          type(e).__name__ + ' in line ' + str(e.__traceback__.tb_lineno) + ': ' + str(e))
                    raise e

        except KeyboardInterrupt:
            print("\nKeyboard Interrupt: Stoping backtest\nElapsed steps: {0}/{1}, {2} % done.".format(self.step,
                                                                             nb_max_episode_steps,
                                                                             int(100 * self.step / nb_max_episode_steps)))


class TestLookAhead(APrioriAgent):
    """
    Test for look ahead bias
    """
    def __repr__(self):
        return "TestLookAhead"

    def __init__(self, mr=False, fiat="BTC"):
        super().__init__(fiat=fiat)
        self.mr = mr

    def predict(self, obs):
        prices = obs.xs('open', level=1, axis=1).astype(np.float64)
        if self.mr:
            price_relative = np.append(prices.apply(lambda x: safe_div(x[-2], x[-1])).values, [1.0])
        else:
            price_relative = np.append(prices.apply(lambda x: safe_div(x[-1], x[-2])).values, [1.0])

        return price_relative

    def rebalance(self, obs):
        factor = self.predict(obs)
        position = np.zeros_like(factor)
        position[np.argmax(factor)] = 1
        return position


class RandomWalk(APrioriAgent):
    """
    Dummytrader that sample actions from a random process
    """
    def __repr__(self):
        return "RandomWalk"

    def __init__(self, random_process=None, activation='softmax', fiat="BTC"):
        """
        Initialization method
        :param env: Apocalipse driver instance
        :param random_process: Random process used to sample actions from
        :param activation: Portifolio activation function
        """
        super().__init__(fiat)

        self.random_process = random_process
        self.activation = activation

    def predict(self, obs):
        """
        Performs a single step on the environment
        """
        if self.random_process:
            if self.activation == 'softmax':
                return array_normalize(self.random_process.sample())
            elif self.activation == 'simplex':
                return self.simplex_proj(self.random_process.sample())
            else:
                return np.array(self.random_process.sample())
        else:
            if self.activation == 'softmax':
                return array_normalize(np.random.random(obs.columns.levels[0].shape[0]))
            elif self.activation == 'simplex':
                return self.simplex_proj(np.random.random(obs.columns.levels[0].shape[0]))
            else:
                return np.random.random(obs.columns.levels[0].shape[0])

    def rebalance(self, obs):
        return self.predict(obs)


class BuyAndHold(APrioriAgent):
    """
    Equally distribute cash at the first step and hold
    """
    def __repr__(self):
        return "BuyAndHold"

    def __init__(self, fiat="BTC"):
        super().__init__(fiat)

    def predict(self, obs):
        if self.step == 0:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs - 1)
            return array_normalize(action)
        else:
            return self.get_portfolio_vector(obs)[:-1]

    def rebalance(self, obs):
        position = self.predict(obs)
        position.resize(obs.columns.levels[0].shape[0])
        position[-1] = self.get_portfolio_vector(obs)[-1]
        return position


class ConstantRebalance(APrioriAgent):
    """
    Equally distribute portfolio every step
    """
    def __repr__(self):
        return "ContantRebalance"

    def __init__(self, position=None, fiat="BTC"):
        super().__init__(fiat)
        if position:
            self.position = array_normalize(position)
        else:
            self.position = False

    def predict(self, obs):
        if not isinstance(self.position, np.ndarray):
            n_symbols = obs.columns.levels[0].shape[0]
            self.position = array_normalize(np.ones(n_symbols - 1))
            self.position = np.append(self.position, [0.0])

        return self.position

    def rebalance(self, obs):
        factor = self.predict(obs)
        return factor

    def set_params(self, **kwargs):
        self.position = np.append(array_normalize(np.array([kwargs[key]
                                            for key in kwargs]))[:-1], [0.0])


# No regret
class ONS(APrioriAgent):
    """
    Online Newton Step algorithm.
    Reference:
        A.Agarwal, E.Hazan, S.Kale, R.E.Schapire.
        Algorithms for Portfolio Management based on the Newton Method, 2006.
        http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_AgarwalHKS06.pdf
        http://rob.schapire.net/papers/newton_portfolios.pdf
    """

    def __repr__(self):
        return "ONS"

    def __init__(self, delta=0.125, beta=1, eta=0., fiat="BTC", name="ONS"):
        """
        :param delta, beta, eta: Model parameters. See paper.
        """
        super().__init__(fiat=fiat, name=name)
        self.delta = delta
        self.beta = beta
        self.eta = eta

    def predict(self, obs):
        prices = obs.xs('open', level=1, axis=1).astype(np.float64)
        price_relative = np.append(prices.apply(lambda x: safe_div(x[-1], x[-2])).values, [1.0])
        return price_relative

    def rebalance(self, obs):
        if not self.init:
            self.n_pairs = obs.columns.levels[0].shape[0]
            self.A = np.mat(np.eye(self.n_pairs))
            self.b = np.mat(np.zeros(self.n_pairs)).T
            self.init = True

        if self.step:
            prev_posit = self.get_portfolio_vector(obs, index=-1)
            price_relative = self.predict(obs)
            return self.update(prev_posit, price_relative)

        else:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            return array_normalize(action)

    def update(self, b, x):
        # calculate gradient
        grad = np.clip(np.mat(safe_div(x, np.dot(b, x))).T, -self.clip, self.clip)
        # update A
        self.A += grad * grad.T
        # update b
        self.b += (1 + safe_div(1., self.beta)) * grad

        # projection of p induced by norm A
        pp = self.projection_in_norm(self.delta * self.A.I * self.b, self.A)

        return pp * (1 - self.eta) + np.ones(len(x)) / float(len(x)) * self.eta

    def projection_in_norm(self, x, M):
        """
        Projection of x to simplex induced by matrix M. Uses quadratic programming.
        """
        m = M.shape[0]

        # Constrains matrices
        P = opt.matrix(2 * M)
        q = opt.matrix(-2 * M * x)
        G = opt.matrix(-np.eye(m))
        h = opt.matrix(np.zeros((m, 1)))
        A = opt.matrix(np.ones((1, m)))
        b = opt.matrix(1.)

        # Solve using quadratic programming
        sol = opt.solvers.qp(P, q, G, h, A, b)
        return np.squeeze(sol['x'])

    def set_params(self, **kwargs):
        self.delta = kwargs['delta']
        self.beta = kwargs['beta']
        self.eta = kwargs['eta']
        if 'mr' in kwargs:
            self.mr = bool(kwargs['mr'])


class OGS(APrioriAgent):
    """
    Online gradient step with AdaGrad algorithm optimizer
    """

    def __repr__(self):
        return "OGS"

    def __init__(self, factor=models.price_relative, lr=1, eta=0., clip_grads=1e6, damping=0.99, mr=False, fiat="BTC", name="ONS"):
        """
        :param delta, beta, eta: Model parameters. See paper.
        """
        super().__init__(fiat=fiat, name=name)

        self.lr = lr
        self.damping = damping
        self.factor = factor
        self.eta = eta
        self.clip = clip_grads
        self.mr = mr

        self.init = False

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        return np.append(self.factor(obs).iloc[-1].values, [1.0])

    def rebalance(self, obs):
        if not self.init:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = array_normalize(action)

            # AdaGrad square gradient
            self.gti = np.ones_like(self.crp)

            self.init = True

        if self.step:
            prev_posit = self.get_portfolio_vector(obs, index=-1)
            price_relative = self.predict(obs)
            return self.update(prev_posit, price_relative)
        else:
            return self.crp

    def update(self, b, x):
        # calculate gradient
        grad = np.clip(safe_div(x, np.dot(b, x)), -self.clip, self.clip) - 1
        self.gti = np.clip(self.gti * self.damping + grad ** 2, 0.0, 1e8)
        adjusted_grad = safe_div(grad, self.gti)

        # update b, we are using relative log return benchmark, so we want to maximize here
        b += self.lr * adjusted_grad

        # projection of p
        pp = simplex_proj(b)

        return pp * (1 - self.eta) + np.ones(len(x)) / float(len(x)) * self.eta

    def set_params(self, **kwargs):
        if 'lr'in kwargs:
            self.lr = kwargs['lr']
        if 'eta' in kwargs:
            self.eta = kwargs['eta']
        if 'mr' in kwargs:
            self.mr = bool(kwargs['mr'])
        if 'damping' in kwargs:
            self.damping = kwargs['damping']


class MW(APrioriAgent):
    """
    Multiplicative-Weights algorithm
    """

    def __repr__(self):
        return "Multiplicative Weights"

    def __init__(self, factor=models.price_relative, lr=1, fiat="BTC", name="MW"):
        """
        :param delta, beta, eta: Model parameters. See paper.
        """
        super().__init__(fiat=fiat, name=name)

        self.lr = lr
        self.factor = factor

        self.init = False

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        return np.append(self.factor(obs).iloc[-1].values, [1.0])


    def rebalance(self, obs):
        if not self.init:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = array_normalize(action)

            # AdaGrad square gradient
            self.gti = np.ones_like(self.crp)

            self.init = True

        if self.step:
            prev_posit = self.get_portfolio_vector(obs, index=-1)
            price_relative = self.predict(obs)
            return self.update(prev_posit, price_relative)
        else:
            return self.crp

    def update(self, b, x):
        # update b, we are using relative log return benchmark, so we want to maximize here
        b += self.lr * b * x

        return simplex_proj(b)

    def set_params(self, **kwargs):
        if 'lr'in kwargs:
            self.lr = kwargs['lr']


class ORAMW(APrioriAgent):
    """
    Online Risk Averse Multiplicative Weights
    This is an algorithm that uses multiplicative weights with gradient experts
    and Extreme Risk Index for portfolio allocation
    References:
        Extreme Risk Index:
        https://arxiv.org/pdf/1505.04045.pdf
    """
    def __repr__(self):
        return "Online Risk Averse Multiplicative Weights"

    def __init__(self, window=120, k=0.1, lr=0.5, mpc=1, fiat="BTC", name='ORAGS'):
        super().__init__(fiat=fiat, name=name)
        self.window = window - 1
        self.k = k
        self.mpc = mpc
        self.opt = gt.MultiplicativeWeights(lr)

        # Extreme risk index
        self.cons = [
            {'type': 'eq', 'fun': lambda w: w.sum() - 1}, # Simplex region
            {'type': 'ineq', 'fun': lambda w: w}, # Positive bound
            # Maximum position concentration constraint
            {'type': 'ineq', 'fun': lambda w: self.mpc - np.linalg.norm(w[:-1], ord=np.inf)}
        ]

        self.crp = None
        self.b = None
        self.init = False

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        prices = obs.xs('open',level=1, axis=1)

        factor = np.hstack([prices.rolling(2).apply(
            lambda x: np.log(safe_div(x[-1], x[-2]))).dropna().values, np.zeros((self.window, 1))])

        factor2 = np.hstack([prices.rolling(2).apply(
            lambda x: np.log(safe_div(x[-2], x[-1]))).dropna().values, np.zeros((self.window, 1))])

        return factor, factor2

    def loss(self, w, R, Z, x):
        # minimize allocation risk
        return risk.ERI(R, Z, w) + w[-1] * np.exp(x).mean() * x.var()

    def update(self, b, x, x2):

        # Update portfolio with no regret
        last_x = x[-1, :]
        leader = np.zeros_like(last_x)
        leader[np.argmax(last_x)] = -1

        b = simplex_proj(self.opt.optimize(leader, b))

        # Manage allocation risk
        b = minimize(
            self.loss,
            b,
            args=(*risk.polar_returns(x2, self.k), last_x),
            constraints=self.cons,
            options={'maxiter': 300},
            tol=1e-6,
            bounds=tuple((0,1) for _ in range(b.shape[0]))
        )

        # Log variables
        self.log['lr'] = "%.4f" % self.opt.lr
        self.log['mpc'] = "%.4f" % self.mpc
        self.log['risk'] = "%.6f" % b['fun']

        # Return best portfolio
        return b['x']

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if not self.init:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = array_normalize(action)
            self.b = self.crp
            # AdaGrad square gradient, started with ones for stability
            self.init = True

        if self.step:
            # b = self.get_portfolio_vector(obs)
            x, x2 = self.predict(obs)
            self.b = self.update(self.b, x, x2)
            return self.b

        else:
            return self.crp

    def set_params(self, **kwargs):
        if 'window' in kwargs:
            self.window = int(kwargs['window'])
        if 'k' in kwargs:
            self.k = kwargs['k']
        if 'lr' in kwargs:
            self.opt.lr = kwargs['lr']
        if 'mpc' in kwargs:
            self.mpc = kwargs['mpc']


class NRS(APrioriAgent):
    """
    Pursuit and Evade No-Regret System
    """
    def __repr__(self):
        return "Pursuit and Evade No-Regret System"

    def __init__(self, window=120, k=0.1, lr=0.5, gradlr=1e-2, beta=0.5,
                 mpc=1, fiat="BTC", name='NRS'):
        super().__init__(fiat=fiat, name=name)
        self.window = window - 1
        self.k = k
        self.mpc = mpc
        self.opt = gt.MultiplicativeWeights(lr)
        self.pe = gt.PursuitAndEvade(gradlr)
        self.beta = beta
        self.lr = lr

        # Extreme risk index
        self.cons = [
            {'type': 'eq', 'fun': lambda w: w.sum() - 1}, # Simplex region
            {'type': 'ineq', 'fun': lambda w: w}, # Positive bound
            {'type': 'ineq', 'fun': lambda w: self.mpc - np.linalg.norm(w, ord=np.inf)}
            # Maximum position concentration constraint
        ]

        self.b = None
        self.w = None
        self.score = None
        self.init = False

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        prices = obs.xs('open', level=1, axis=1)
        log_returns = np.hstack([prices.rolling(2).apply(
            lambda x: np.log(safe_div(x[-1], x[-2]))).dropna().values, np.zeros((self.window, 1))])
        return log_returns

    # Pareto Extreme Risk Index
    @staticmethod
    def estimate_alpha(R):
        return safe_div((R.shape[0] - 1), np.log(safe_div(R[:-1], R[-1])).sum())

    @staticmethod
    def estimate_gamma(alpha, Z, w):
        return (1 / (Z.shape[0] - 1)) * np.power(np.clip(w * Z[:-1].T, 0.0, np.inf), alpha).sum()

    def loss_tf(self, w, alpha, Z, x):
        # minimize allocation risk
        return self.estimate_gamma(alpha, Z, w) + w[-1] * np.exp(x).mean() * x.var()

    def loss_eri(self, w, alpha, Z, b):
        # minimize allocation risk
        return self.estimate_gamma(alpha, Z, w) + np.linalg.norm(b - w) ** 2

    def update(self, b, x):
        # Update portfolio with no regret
        last_x = x[-1, :]
        self.r_hat = self.beta * self.r_hat + (1 - self.beta) * last_x

        # ERI params
        R, Z = risk.polar_returns(-x, self.k)
        alpha = self.estimate_alpha(R)

        # Compute experts scores
        for i in range(self.score.shape[0]):
            self.score[i] = self.score[i] * self.beta + (1 - self.beta) * np.dot(last_x, self.w[i])

        cons = self.cons + [{'type': 'eq', 'fun': lambda w:
            np.dot(w, self.r_hat) - np.clip(0.001, 0.0, self.r_hat.max())}]

        # Choose to follow or pursuit
        best_w = self.w[np.argmax(self.score)]
        if np.allclose(b, best_w, 1e-2, 1e-2):
            action = 'follow'
        else:
            action = 'pursuit'

        # Update experts
        leader = np.zeros_like(last_x)
        leader[np.argmax(last_x)] = -1

        # self.opt1.lr = self.lr / np.exp((self.score[1] + self.score[0]))
        self.w[0] = minimize(
            self.loss_tf,
            self.opt.optimize(leader, self.w[0]),
            args=(alpha, Z, last_x),
            constraints=self.cons,
            options={'maxiter': 666},
            tol=1e-7,
            bounds=tuple((0,1) for _ in range(b.shape[0]))
        )['x']

        self.w[1] = minimize(
            self.loss_eri,
            self.w[1],
            args=(alpha, Z, self.w[1]),
            constraints=cons,
            options={'maxiter': 666},
            tol=1e-7,
            bounds=tuple((0,1) for _ in range(b.shape[0]))
        )['x']

        if action == 'follow':
            b = simplex_proj(self.w[np.argmax(self.score)])
        elif action == 'pursuit':
            b = simplex_proj(self.pe.optimize(self.w[np.argmax(self.score)], b))

        # Log variables
        self.log['score'] = "tf: %.4f, mr: %.4f" % (self.score[0], self.score[1])
        self.log['ERI'] = "%.8f" % risk.ERI(*risk.polar_returns(-x, self.k), b)
        self.log['TCVaR'] = "%.2f" % risk.TCVaR(*risk.fit_t(np.dot(x, b)))
        self.log['alpha'] = "%.2f" % alpha
        self.log['CC'] = "%.2f" % np.power(b, 2).sum() ** -1
        self.log['action'] = action
        self.log['lr'] = "%.2f" % self.opt.lr
        self.log['beta'] = "%.2f" % self.beta
        self.log['mpc'] = "%.2f" % self.mpc

        return b

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if not self.step:
            n_pairs = obs.columns.levels[0].shape[0]
            crp = np.ones(n_pairs)
            crp[-1] = 0

            self.crp = array_normalize(crp)
            self.w = np.vstack([self.crp.reshape([1, -1]) for _ in range(2)])
            self.b = self.crp
            self.r_hat = np.zeros(n_pairs)
            self.score = np.zeros(self.w.shape[0])
            return self.crp

        x = self.predict(obs)
        self.b = self.update(self.b, x)
        return self.b


class AdaHedge(APrioriAgent):
    """
    AdaHedge
    https://arxiv.org/pdf/1301.0534.pdf
    """
    def __repr__(self):
        return "AdaHedge"

    def __init__(self, window=3, eta=0.999, fiat="BTC", name='AdaHedge'):
        super().__init__(fiat=fiat, name=name)
        self.window = window - 1
        self.opt = gt.HigherOrderMultiplicativeWeights(1, 8)
        self.eta = eta

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        prices = obs.xs('open',level=1, axis=1)
        factor = np.hstack([prices.rolling(2).apply(
            lambda x: np.log(safe_div(x[-1], x[-2]))).dropna().values, np.zeros((self.window, 1))])

        return factor

    def update(self, b, x):
        # Update portfolio with no regret
        last_x = x[-1, :]
        experts_losses = -last_x

        h = np.dot(experts_losses, b)
        m = (-1 / self.opt.lr) * np.log(np.dot(b, exp_approx(-self.opt.lr * experts_losses, order=8)))
        delta = h - m
        self.delta = self.eta * self.delta + delta

        # Update learning rate
        self.opt.lr = np.log(b.shape[0]) / self.delta

        # Update weights
        b = array_normalize(self.opt.optimize(experts_losses, b * self.eta + (1 - self.eta) * self.crp))

        # Log variables
        self.log['lr'] = "%.4f" % self.opt.lr
        self.log['delta'] = "%.4f" % self.delta
        self.log['eta'] = "%.4f" % self.eta

        # Return updated portfolio
        return b

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if not self.step:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = self.b = array_normalize(action)

            # AdaHedge accumulators
            self.delta = 1

            return self.crp

        x = self.predict(obs)
        self.b = self.update(self.b, x)
        return self.b


class Flipflop(APrioriAgent):
    """
    Flipflop
    https://arxiv.org/pdf/1301.0534.pdf
    """

    def __repr__(self):
        return "Flipflop"

    def __init__(self, window=120, phi=1.1, alpha=0.1, fiat="BTC", name='Flipflop'):
        super().__init__(fiat=fiat, name=name)
        self.window = window - 1
        self.opt = gt.HigherOrderMultiplicativeWeights(1, order=8)
        self.scale = np.array([phi / alpha, alpha])

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        prices = obs.xs('open', level=1, axis=1)

        factor = np.hstack([prices.rolling(2).apply(
            lambda x: np.log(safe_div(x[-1], x[-2]))).dropna().values, np.zeros((self.window, 1))])

        return factor

    def update(self, b, x):
        # Update portfolio with no regret
        last_x = x[-1, :]
        experts_loss = -last_x

        # losses
        h = np.dot(experts_loss, b)
        m = (-1 / self.opt.lr) * np.log(np.dot(b, np.exp(-self.opt.lr * experts_loss)))
        delta = h - m

        # Accumulate delta
        self.delta[self.regime] += delta

        if self.delta[self.regime] > self.scale[self.regime] * self.delta[1 - self.regime]:
            self.regime = 1 - self.regime

        if self.regime == 0:
            self.opt.lr = 1e3
        else:
            self.opt.lr = np.log(b.shape[0]) / self.delta[1]

        b = simplex_proj(self.opt.optimize(experts_loss, b))

        # Log variables
        self.log['lr'] = "%.4f" % self.opt.lr
        self.log['delta'] = "%s" % str(self.delta)

        # Return best portfolio
        return b

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if not self.step:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = self.b = array_normalize(action)

            # AdaHedge accumulators
            self.delta = np.ones(2, dtype=np.float64) * 1e-3
            self.regime = 0

            return self.crp

        x = self.predict(obs)
        self.b = self.update(self.b, x)
        return self.b


# Pattern trading
class HarmonicTrader(APrioriAgent):
    """
    Fibonacci harmonic pattern trader
    """

    def __repr__(self):
        return "HarmonicTrader"

    def __init__(self, peak_order=7, err_allowed=0.05, decay=0.99, activation=simplex_proj, fiat="BTC", name="Harmonic"):
        """
        Fibonacci trader init method
        :param peak_order: Extreme finder movement magnitude threshold
        :param err_allowed: Pattern error margin to be accepted
        :param decay: float: Decay rate for portfolio selection. Between 0 and 1
        :param fiat: Fiat symbol to use in trading
        """
        super().__init__(fiat, name=name)
        self.err_allowed = err_allowed
        self.peak_order = peak_order
        self.alpha = [1., 1.]
        self.decay = decay
        self.activation = activation

    def find_extreme(self, obs):
        max_idx = argrelextrema(obs.open.values, np.greater, order=self.peak_order)[0]
        min_idx = argrelextrema(obs.open.values, np.less, order=self.peak_order)[0]
        extreme_idx = np.concatenate([max_idx, min_idx, [obs.shape[0] - 1]])
        extreme_idx.sort()
        return obs.open.iloc[extreme_idx]

    def calc_intervals(self, extremes):
        XA = extremes.iloc[-2] - extremes.iloc[-1]
        AB = extremes.iloc[-3] - extremes.iloc[-2]
        BC = extremes.iloc[-4] - extremes.iloc[-3]
        CD = extremes.iloc[-5] - extremes.iloc[-4]

        return XA, AB, BC, CD

    def find_pattern(self, obs, c1, c2, c3):
        try:
            XA, AB, BC, CD = self.calc_intervals(self.find_extreme(obs))

            # Gartley fibonacci pattern
            AB_range = np.array([c1[0] - self.err_allowed, c1[1] + self.err_allowed]) * abs(XA)
            BC_range = np.array([c2[0] - self.err_allowed, c2[1] + self.err_allowed]) * abs(AB)
            CD_range = np.array([c3[0] - self.err_allowed, c3[1] + self.err_allowed]) * abs(BC)

            if AB_range[0] < abs(AB) < AB_range[1] and \
                                    BC_range[0] < abs(BC) < BC_range[1] and \
                                    CD_range[0] < abs(CD) < CD_range[1]:
                if XA > 0 and AB < 0 and BC > 0 and CD < 0:
                    return 1
                elif XA < 0 and AB > 0 and BC < 0 and CD > 0:
                    return -1
                else:
                    return 0
            else:
                return 0
        except IndexError:
            return 0

    def is_gartley(self, obs):
        return self.find_pattern(obs, c1=(0.618, 0.618), c2=(0.382, 0.886), c3=(1.27, 1.618))

    def is_butterfly(self, obs):
        return self.find_pattern(obs, c1=(0.786, 0.786), c2=(0.382, 0.886), c3=(1.618, 2.618))

    def is_bat(self, obs):
        return self.find_pattern(obs, c1=(0.382, 0.5), c2=(0.382, 0.886), c3=(1.618, 2.618))

    def is_crab(self, obs):
        return self.find_pattern(obs, c1=(0.382, 0.618), c2=(0.382, 0.886), c3=(2.24, 3.618))

    def predict(self, obs):
        pairs = obs.columns.levels[0]
        action = np.zeros(pairs.shape[0] - 1)
        for i, pair in enumerate(pairs):
            if pair is not self.fiat:
                pattern = np.array([pattern(obs[pair]) for pattern in [self.is_gartley,
                                                                       self.is_butterfly,
                                                                       self.is_bat,
                                                                       self.is_crab]]).sum()

                action[i] = pattern

        return action

    def rebalance(self, obs):
        if self.step:
            pairs = obs.columns.levels[0]
            prev_port = self.get_portfolio_vector(obs)
            action = self.predict(obs)
            port_vec = np.zeros(pairs.shape[0])
            for i in range(pairs.shape[0] - 1):
                if action[i] >= 0:
                    port_vec[i] = max(0.,
                                      (self.decay * prev_port[i] + (1 - self.decay)) + self.alpha[0] * action[
                                          i])
                else:
                    port_vec[i] = max(0.,
                                      (self.decay * prev_port[i] + (1 - self.decay)) + self.alpha[1] * action[
                                          i])

            port_vec[-1] = max(0, 1 - port_vec.sum())

        else:
            n_pairs = obs.columns.levels[0].shape[0]
            port_vec = np.ones(n_pairs)
            port_vec[-1] = 0

        return self.activation(port_vec)

    def set_params(self, **kwargs):
        self.err_allowed = kwargs['err_allowed']
        self.peak_order = int(kwargs['peak_order'])
        self.decay = kwargs['decay']
        self.alpha = [kwargs['alpha_up'], kwargs['alpha_down']]


# Mean reversion
class PAMR(APrioriAgent):
    """
    Passive aggressive mean reversion strategy for portfolio selection.

    Reference:
        B. Li, P. Zhao, S. C.H. Hoi, and V. Gopalkrishnan.
        Pamr: Passive aggressive mean reversion strategy for portfolio selection, 2012.
        https://link.springer.com/content/pdf/10.1007%2Fs10994-012-5281-z.pdf
    """
    def __repr__(self):
        return "PAMR"

    def __init__(self, eps=0.03, C=2444, variant="PAMR1", fiat="BTC", name="PAMR"):
        """
        :param sensitivity: float: Sensitivity parameter. Lower is more sensitive.
        :param C: float: Aggressiveness parameter. For PAMR1 and PAMR2 variants.
        :param variant: str: The variant of the proposed algorithm. It can be PAMR, PAMR1, PAMR2.
        :
        """
        super().__init__(fiat=fiat, name=name)
        self.eps = eps
        self.C = C
        self.variant = variant

    def predict(self, obs):
        """
        Performs prediction given environment observation
        """
        prices = obs.xs('open', level=1, axis=1).astype(np.float64)
        price_relative = np.append(prices.apply(lambda x: safe_div(x[-2], x[-1])).values, [1.0])

        return price_relative

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if self.step:
            prev_posit = self.get_portfolio_vector(obs, index=-2)
            price_relative = self.predict(obs)
            return self.update(prev_posit, price_relative)
        else:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            return array_normalize(action)

    def update(self, b, x):
        """
        Update portfolio weights to satisfy constraint b * x <= eps
        and minimize distance to previous portfolio.
        :param b: numpy array: Last portfolio vector
        :param x: numpy array: Price movement prediction
        """
        # x_mean = np.mean(x)
        # if np.dot(b, x) >= 1:
        #     le = max(0., np.dot(b, x) - (1 + self.sensitivity))
        # else:
        #     le = max(0, (1 - self.sensitivity) - np.dot(b, x))

        x_mean = np.mean(x)

        le = max(0., np.dot(b, x) - self.eps)

        if self.variant == 'PAMR0':
            lam = safe_div(le, np.linalg.norm(x - x_mean) ** 2)
        elif self.variant == 'PAMR1':
            lam = min(self.C, safe_div(le, np.linalg.norm(x - x_mean) ** 2))
        elif self.variant == 'PAMR2':
            lam = safe_div(le, (np.linalg.norm(x - x_mean) ** 2 + 0.5 / self.C))
        else:
            raise TypeError("Bad variant param.")

        # limit lambda to avoid numerical problems
        lam = min(100000, lam)

        # update portfolio
        b += lam * (x - x_mean)

        # project it onto simplex
        return simplex_proj(b)

    def set_params(self, **kwargs):
        self.eps = kwargs['eps']
        if 'C' in kwargs:
            self.C = kwargs['C']
        self.variant = kwargs['variant']


class OLMAR(APrioriAgent):
    """
        On-Line Portfolio Selection with Moving Average Reversion

        Reference:
            B. Li and S. C. H. Hoi.
            On-line portfolio selection with moving average reversion, 2012.
            http://icml.cc/2012/papers/168.pdf
        """

    def __repr__(self):
        return "OLMAR"

    def __init__(self, window=7, eps=0.02, fiat="BTC", name="OLMAR"):
        """
        :param window: integer: Lookback window size.
        :param eps: float: Threshold value for updating portfolio.
        """
        super().__init__(fiat=fiat, name=name)
        self.window = window
        self.eps = eps

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        prices = obs.xs('open', level=1, axis=1).astype(np.float64)
        price_predict = np.append(safe_div(prices.iloc[-self.window:].mean().values, prices.iloc[-1].values), [1.0])

        return price_predict

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if self.step:
            prev_posit = self.get_portfolio_vector(obs, index=-2)
            price_predict = self.predict(obs)
            return self.update(prev_posit, price_predict)
        else:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            return array_normalize(action)

    def update(self, b, x):
        """
        Update portfolio weights to satisfy constraint b * x >= eps
        and minimize distance to previous weights.
        :param b: numpy array: Last portfolio vector
        :param x: numpy array: Price movement prediction
        """
        xt = np.dot(b, x)
        x_mean = np.mean(x)

        lam = max(0., safe_div((xt - self.eps), np.linalg.norm(x - x_mean) ** 2))

        # limit lambda to avoid numerical problems
        lam = min(100000, lam)

        # update portfolio
        b += lam * (x - x_mean)

        # project it onto simplex
        return simplex_proj(b)

    def set_params(self, **kwargs):
        self.eps = kwargs['eps']
        self.window = int(kwargs['window'])


class CWMR(APrioriAgent):
    """ Confidence weighted mean reversion.
    Reference:
        B. Li, S. C. H. Hoi, P.L. Zhao, and V. Gopalkrishnan.
        Confidence weighted mean reversion strategy for online portfolio selection, 2013.
        http://jmlr.org/proceedings/papers/v15/li11b/li11b.pdf
    """
    def __repr__(self):
        return "CWMR"

    def __init__(self, eps=-0.5, confidence=0.95, var=0, rebalance=True, fiat="BTC", name="CWMR"):
        """
        :param eps: Mean reversion threshold (expected return on current day must be lower
                    than this threshold). Recommended value is -0.5.
        :param confidence: Confidence parameter for profitable mean reversion portfolio.
                    Recommended value is 0.95.
        """
        super(CWMR, self).__init__(fiat=fiat, name=name)

        # input check
        if not (0 <= confidence <= 1):
            raise ValueError('confidence must be from interval [0,1]')
        if rebalance:
            self.reb = -2
        else:
            self.reb = -1
        self.eps = eps
        self.theta = stats.norm.ppf(confidence)
        self.var = var

    def predict(self, obs):
        """
        Performs prediction given environment observation
        """
        prices = obs.xs('open', level=1, axis=1).astype(np.float64)
        price_relative = prices.apply(lambda x: safe_div(x[-1], x[-2])).values
        price_relative = np.append(price_relative, [1.0])

        return price_relative

    def update(self, b, x):
        # initialize
        m = len(x)
        mu = np.matrix(b).T
        sigma = self.sigma
        theta = self.theta
        eps = self.eps
        x = np.matrix(x).T  # matrices are easier to manipulate

        # 4. Calculate the following variables
        M = mu.T * x
        V = x.T * sigma * x
        x_upper = sum(diag(sigma) * x) / trace(sigma)

        # 5. Update the portfolio distribution
        mu, sigma = self.calculate_change(x, x_upper, mu, sigma, M, V, theta, eps)

        # 6. Normalize mu and sigma
        mu = simplex_proj(mu)
        sigma = sigma / (m ** 2 * trace(sigma))
        """
        sigma(sigma < 1e-4*eye(m)) = 1e-4;
        """
        self.sigma = sigma

        return np.array(mu.T).ravel()

    def calculate_change(self, x, x_upper, mu, sigma, M, V, theta, eps):
        if not self.var:
            # lambda from equation 7
            foo = (V - x_upper * x.T * np.sum(sigma, axis=1)) / M ** 2 + V * theta ** 2 / 2.
            a = foo ** 2 - V ** 2 * theta ** 4 / 4
            b = 2 * (eps - log(M)) * foo
            c = (eps - log(M)) ** 2 - V * theta ** 2

            a, b, c = a[0, 0], b[0, 0], c[0, 0]

            lam = max(0,
                      (-b + sqrt(b ** 2 - 4 * a * c)) / (2. * a),
                      (-b - sqrt(b ** 2 - 4 * a * c)) / (2. * a))
            # bound it due to numerical problems
            lam = min(lam, 1E+7)

            # update mu and sigma
            U_sqroot = 0.5 * (-lam * theta * V + sqrt(lam ** 2 * theta ** 2 * V ** 2 + 4 * V))
            mu = mu - lam * sigma * (x - x_upper) / M
            sigma = inv(inv(sigma) + theta * lam / U_sqroot * diag(x) ** 2)
            """
            tmp_sigma = inv(inv(sigma) + theta*lam/U_sqroot*diag(xt)^2);
            % Don't update sigma if results are badly scaled.
            if all(~isnan(tmp_sigma(:)) & ~isinf(tmp_sigma(:)))
                sigma = tmp_sigma;
            end
            """

            return mu, sigma

        else:
            """ First variant of a CWMR outlined in original article. It is
            only approximation to the posted problem. """
            # lambda from equation 7
            foo = (V - x_upper * x.T * np.sum(sigma, axis=1)) / M ** 2
            a = 2 * theta * V * foo
            b = foo + 2 * theta * V * (eps - log(M))
            c = eps - log(M) - theta * V

            a, b, c = a[0, 0], b[0, 0], c[0, 0]

            lam = max(0,
                      (-b + sqrt(b ** 2 - 4 * a * c)) / (2. * a),
                      (-b - sqrt(b ** 2 - 4 * a * c)) / (2. * a))
            # bound it due to numerical problems
            lam = min(lam, 1E+7)

            # update mu and sigma
            mu = mu - lam * sigma * (x - x_upper) / M
            sigma = inv(inv(sigma) + 2 * lam * theta * diag(x) ** 2)
            """
            tmp_sigma = inv(inv(sigma) + theta*lam/U_sqroot*diag(xt)^2);
            % Don't update sigma if results are badly scaled.
            if all(~isnan(tmp_sigma(:)) & ~isinf(tmp_sigma(:)))
                sigma = tmp_sigma;
            end
            """

            return mu, sigma

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        n_pairs = obs.columns.levels[0].shape[0]
        if self.step:
            prev_posit = self.get_portfolio_vector(obs, index=self.reb)
            price_relative = self.predict(obs)
            return self.update(prev_posit, price_relative)
        else:
            action = np.ones(n_pairs)
            action[-1] = 0
            self.sigma = np.matrix(np.eye(n_pairs) / n_pairs ** 2)
            return array_normalize(action)

    def set_params(self, **kwargs):
        self.eps = kwargs['eps']
        self.theta = stats.norm.ppf(kwargs['confidence'])


class STMR(APrioriAgent):
    """
    Short term mean reversion strategy for portfolio selection.

    Original algo by José Olímpio Mendes
    27/11/2017
    """

    def __repr__(self):
        return "STMR"

    def __init__(self, eps=0.02, eta=0.0, window=120, k=0.1, mpc=1, rc=1, fiat="BTC", name="STMR"):
        """
        :param sensitivity: float: Sensitivity parameter. Lower is more sensitive.
        """
        super().__init__(fiat=fiat, name=name)
        self.eps = eps
        self.eta = eta
        self.window = window - 1
        self.k = k
        self.mpc = mpc
        self.rc = rc

        self.init = False

    def predict(self, obs):
        """
        Performs prediction given environment observation
        """
        prices = obs.xs('open', level=1, axis=1).astype(np.float64)
        price_relative = np.append(prices.apply(lambda x: safe_div(x[-2], x[-1]) - 1).values, [0.0])

        return price_relative

    def polar_returns(self, obs):
        """
        Calculate polar return
        :param obs: pandas DataFrame
        :return: return radius, return angles
        """
        # Find relation between price and previous price
        prices = obs.xs('open', level=1, axis=1).astype(np.float64).iloc[-self.window - 1:]
        price_relative = np.hstack([np.mat(prices.rolling(2).apply(
            lambda x: safe_div(x[-2], x[-1]) - 1).dropna().values), np.zeros((self.window, 1))])

        # Find the radius and the angle decomposition on price relative vectors
        radius = np.linalg.norm(price_relative, ord=1, axis=1)
        angle = np.divide(price_relative, np.mat(radius).T)

        # Select the 'window' greater values on the observation
        index = np.argpartition(radius, -(int(self.window * self.k) + 1))[-(int(self.window * self.k) + 1):]
        index = index[np.argsort(radius[index])]

        # Return the radius and the angle for extreme found values
        return radius[index][::-1], angle[index][::-1]

    def estimate_alpha(self, radius):
        """
        Estimate pareto's distribution alpha
        :param radius: polar return radius
        :return: alpha
        """
        return safe_div((radius.shape[0] - 1), np.log(safe_div(radius[:-1], radius[-1])).sum())

    def estimate_gamma(self, alpha, Z, w):
        """
        Estimate risk index gamma
        :param self:
        :param alpha:
        :param Z:
        :param w:
        :return:
        """
        return (1 / (Z.shape[0] - 1)) * np.power(np.clip(w * Z[:-1].T, 0.0, np.inf), alpha).sum()

    def loss(self, w, alpha, Z, x):
        # minimize allocation risk
        gamma = self.estimate_gamma(alpha, Z, w)
        # if the experts mean returns are low and you have no options, you can choose fiat
        return self.rc * gamma + w[-1] * ((x.mean()) * x.var()) ** 2

    def update(self, b, x, alpha, Z):
        """
        Update portfolio weights to satisfy constraint b * x <= eps
        and minimize distance to previous portfolio.
        :param b: numpy array: Last portfolio vector
        :param x: numpy array: Price movement prediction
        """
        x_mean = np.mean(x)
        portvar = np.dot(b, x)

        change = abs((portvar + x[np.argmax(abs(x - x_mean))]) / 2)

        lam = np.clip(safe_div(change - self.eps, np.linalg.norm(x - x_mean) ** 2), 0.0, 1e6)

        # update portfolio
        b += lam * (x - x_mean)

        # # project it onto simplex
        b = simplex_proj(b) * (1 - self.eta) + self.eta * self.crp

        if self.rc > 0:
            # Extreme risk index
            # simplex constraints
            cons = [
                {'type': 'eq', 'fun': lambda w: w.sum() - 1},  # Simplex region
                {'type': 'ineq', 'fun': lambda w: w}  # Positive bound
            ]

            if self.mpc < 1:
                # Maximum position concentration constraint
                cons.append({'type': 'ineq', 'fun': lambda w: self.mpc - np.linalg.norm(w[:-1], ord=np.inf)})

            # Minimize loss starting from adjusted portfolio
            b = minimize(self.loss, b, args=(alpha, Z, x + 1), constraints=cons)['x']

        # Return best portfolio
        return np.clip(b, 0, 1)  # Truncate small errors

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if not self.init:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = array_normalize(action)
            self.init = True

        if self.step:
            b = self.get_portfolio_vector(obs)
            x = self.predict(obs)
            # return self.update(prev_posit, price_relative)
            R, Z = self.polar_returns(obs)
            alpha = self.estimate_alpha(R)
            self.last_port = self.update(b, x, alpha, Z)
            return self.last_port

        else:
            return self.crp

    def set_params(self, **kwargs):
        if 'eps' in kwargs:
            self.eps = kwargs['eps']
        if 'eta' in kwargs:
            self.eta = kwargs['eta']


class KAMAMR(STMR):
    def __repr__(self):
        return "KAMAMR"

    def __init__(self, eps=0.02, window=3, rebalance=True, activation=simplex_proj, fiat="BTC", name="STMR"):
        """
        :param sensitivity: float: Sensitivity parameter. Lower is more sensitive.
        """
        super().__init__(fiat=fiat, name=name)
        self.eps = eps
        self.window = window
        self.activation = activation
        if rebalance:
            self.reb = -2
        else:
            self.reb = -1

    def predict(self, obs):
        """
        Performs prediction given environment observation
        """
        prices = obs.xs('open', level=1, axis=1).astype(np.float64)
        mu = prices.apply(tl.KAMA, timeperiod=self.window, raw=True).iloc[-1].values

        price_relative = np.append(safe_div(mu, prices.iloc[-1].values) - 1, [0.0])

        return price_relative

    def set_params(self, **kwargs):
        self.eps = kwargs['eps']
        self.window = int(kwargs['window'])


# Portfolio optimization
class TCO(APrioriAgent):
    """
    Transaction cost optimization for online portfolio selection

    Reference:
        B. Li and J. Wang
        http://ink.library.smu.edu.sg/cgi/viewcontent.cgi?article=4761&context=sis_research
    """
    def __repr__(self):
        return "TCO"

    def __init__(self, factor=models.price_relative, toff=0.1, optimize_factor=True, rebalance=True, fiat="BTC", name="TCO"):
        """
        :param window: integer: Lookback window size.
        :param eps: float: Threshold value for updating portfolio.
        """
        super().__init__(fiat=fiat, name=name)
        self.toff = toff
        self.factor = factor
        self.optimize_factor = optimize_factor
        if rebalance:
            self.reb = -2
        else:
            self.reb = -1

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        # price_predict = np.empty(obs.columns.levels[0].shape[0] - 1, dtype=np.float64)
        # for key, symbol in enumerate([s for s in obs.columns.levels[0] if s is not self.fiat]):
        #     price_predict[key] = np.float64(obs[symbol].open.iloc[-self.window:].mean() /
        #                                     (obs.get_value(obs.index[-1], (symbol, 'open')) + self.epsilon))
        prev_posit = self.get_portfolio_vector(obs, index=-1) + 1
        factor_posit = self.factor(obs) + 1
        return safe_div(factor_posit, prev_posit)

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if self.step:
            prev_posit = self.get_portfolio_vector(obs, index=self.reb)
            price_prediction = self.predict(obs)
            return self.update(prev_posit, price_prediction)
        else:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            return array_normalize(action)

    def update(self, b, x):
        """
        Update portfolio weights to satisfy constraint b * x >= eps
        and minimize distance to previous weights.
        :param b: numpy array: Last portfolio vector
        :param x: numpy array: Price movement prediction
        """
        vt = safe_div(x, np.dot(b, x))
        vt_mean = np.mean(vt)
        # update portfolio
        b += np.sign(vt - vt_mean) * np.clip(abs(vt - vt_mean) - self.toff, 0.0, np.inf)

        # project it onto simplex
        return simplex_proj(b)

    def set_params(self, **kwargs):
        self.toff = kwargs['toff']
        if self.optimize_factor:
            self.factor.set_params(**kwargs)


class Anticor(APrioriAgent):
    """ Anticor (anti-correlation) is a heuristic portfolio selection algorithm.
    It adopts the consistency of positive lagged cross-correlation and negative
    autocorrelation to adjust the portfolio. Eventhough it has no known bounds and
    hence is not considered to be universal, it has very strong empirical results.
    Reference:
        A. Borodin, R. El-Yaniv, and V. Gogan.  Can we learn to beat the best stock, 2005.
        http://www.cs.technion.ac.il/~rani/el-yaniv-papers/BorodinEG03.pdf
    """

    def __repr__(self):
        return "Anticor"

    def __init__(self, window=30, fiat="BTC"):
        """
        :param window: Window parameter.
        """
        super().__init__(fiat=fiat)
        self.window = window

    def predict(self, obs):
        """

        :param obs:
        :return:
        """
        price_log1 = np.empty((self.window - 2, obs.columns.levels[0].shape[0] - 1), dtype='f')
        price_log2 = np.empty((self.window - 2, obs.columns.levels[0].shape[0] - 1), dtype='f')
        for key, symbol in enumerate([s for s in obs.columns.levels[0] if s is not self.fiat]):
            price_log1[:, key] = obs[symbol].open.iloc[-2 * self.window + 1:-self.window].rolling(2).apply(
                lambda x: np.log10(safe_div(x[-1], x[-2]))).dropna().values.T
            price_log2[:, key] = obs[symbol].open.iloc[-self.window + 1:].rolling(2).apply(
                lambda x: np.log10(safe_div(x[-1], x[-2]))).dropna().values.T
        return price_log1, price_log2

    def rebalance(self, obs):
        if self.step:
            prev_posit = self.get_portfolio_vector(obs, index=-1)[:-1]
            factor = self.predict(obs)
            return self.update(prev_posit, *factor)
        else:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            return array_normalize(action)

    @staticmethod
    def zero_to_inf(vec):
        return np.vectorize(lambda x: np.inf if np.allclose(x, [0.0]) else x)(vec)

    def update(self, b, lx1, lx2):
        mean2 = lx2.mean(axis=0)
        std1 = self.zero_to_inf(lx1.std(axis=0))
        std2 = self.zero_to_inf(lx2.std(axis=0))

        corr = np.matmul(((lx1 - lx1.mean(axis=0)) / std1).T, (lx2 - mean2) / std2)
        claim = np.zeros_like(corr)

        for i in range(corr.shape[0]):
            for j in range(corr.shape[1]):
                if i == j: continue
                else:
                    if mean2[i] > mean2[j] and corr[i, j] > 0:
                        # Correlation matrix
                        claim[i, j] += corr[i, j]
                        # autocorrelation
                        if corr[i, i] < 0:
                            claim[i, j] += abs(corr[i, i])
                        if corr[j, j] < 0:
                            claim[i, j] += abs(corr[j, j])

        # calculate transfer
        transfer = claim * 0.
        for i in range(corr.shape[0]):
            total_claim = sum(claim[i, :])
            if total_claim != 0:
                transfer[i, :] = b[i] * safe_div(claim[i, :], total_claim)

        b += np.sum(transfer, axis=0) - np.sum(transfer, axis=1)

        return np.append(simplex_proj(b), [0.0])

    def set_params(self, **kwargs):
        self.window = int(kwargs['window'])


# Modern Portfolio Theory
class MeanVariance(APrioriAgent):

    def __repr__(self):
        return "Modern Portfolio Theory"

    def __init__(self, factor=models.price_relative, fiat="BTC", name='TangentPortfolio'):
        """
        :param window: Window parameter.
        """
        super().__init__(fiat=fiat, name=name)
        self.factor = factor
        self.fiat = fiat
        self.init = False

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        return self.factor(obs).iloc[-1]

    def update(self, cov_mat, exp_rets):
        raise NotImplementedError("You should overwrite this method in the child class.")

    def rebalance(self, obs):
        raise NotImplementedError("You should overwrite this method in the child class.")

    def set_params(self, **kwargs):
        self.target_return = kwargs['target_return']


class TangencyPortfolio(MeanVariance):
    """
    Computes a tangency portfolio, i.e. a maximum Sharpe ratio portfolio.
    """

    def __repr__(self):
        return "Tangency Portfolio"

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if not self.init:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = array_normalize(action)
            self.init = True

        if self.step:
            x = self.predict(obs)
            price_relative = obs.xs('open', level=1, axis=1).apply(lambda x: ta.ROCR(x, timeperiod=1),
                                                                   raw=True).fillna(1.0)
            cov_mat = price_relative.cov()
            return self.update(cov_mat, x)
        else:
            return self.crp

    def update(self, cov_mat, exp_rets):
        """
         Note: As the Sharpe ratio is not invariant with respect
         to leverage, it is not possible to construct non-trivial
         market neutral tangency portfolios. This is because for
         a positive initial Sharpe ratio the sharpe grows unbound
         with increasing leverage.

         Parameters
         ----------
         cov_mat: pandas.DataFrame
             Covariance matrix of asset returns.
         exp_rets: pandas.Series
             Expected asset returns (often historical returns).
         allow_short: bool, optional
             If 'False' construct a long-only portfolio.
             If 'True' allow shorting, i.e. negative weights.

         Returns
         -------
         weights: pandas.Series
             Optimal asset weights.
         """
        if not isinstance(cov_mat, pd.DataFrame):
            raise ValueError("Covariance matrix is not a DataFrame")

        if not isinstance(exp_rets, pd.Series):
            raise ValueError("Expected returns is not a Series")

        if not cov_mat.index.equals(exp_rets.index):
            raise ValueError("Indices do not match")

        n = len(cov_mat)

        P = opt.matrix(cov_mat.values)
        q = opt.matrix(0.0, (n, 1))

        # Constraints Gx <= h
        # exp_rets*x >= 1 and x >= 0
        G = opt.matrix(np.vstack((-exp_rets.values,
                                  -np.identity(n))))
        h = opt.matrix(np.vstack((-1.0,
                                  np.zeros((n, 1)))))

        # Solve
        optsolvers.options['show_progress'] = False
        sol = optsolvers.qp(P, q, G, h)

        if sol['status'] != 'optimal':
            warnings.warn("Convergence problem")

        weights = np.append(np.squeeze(sol['x']), [0.0])

        # Rescale weights, so that sum(weights) = 1
        weights /= weights.sum()
        return weights


class Markowitz(MeanVariance):
    """
    Markowitz portfolio optimization
    """
    def __repr__(self):
        return "Markowitz Portfolio"

    def __init__(self, factor=models.price_relative, target_return=0.0025, fiat="BTC", name='Markowitz'):
        """
        :param window: Window parameter.
        """
        super().__init__(fiat=fiat, name=name)
        self.target_return = target_return
        self.factor = factor
        self.fiat = fiat
        self.init = False

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if not self.init:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = array_normalize(action)
            self.init = True

        if self.step:
            x = self.predict(obs)
            # x[self.fiat] = 1 * (1 - x.std())
            price_relative = obs.xs('open', level=1, axis=1).apply(lambda x: ta.ROCR(x, timeperiod=1),
                                                                   raw=True).fillna(1.0)
            # price_relative[self.fiat] = 1 * (1 - price_relative.std(axis=1))
            cov_mat = price_relative.cov()
            return self.update(cov_mat, x, self.target_return)
        else:
            return self.crp

    def update(self, cov_mat, exp_rets, target_ret):
        """
        Computes a Markowitz portfolio.

        Parameters
        ----------
        cov_mat: pandas.DataFrame
            Covariance matrix of asset returns.
        exp_rets: pandas.Series
            Expected asset returns (often historical returns).
        target_ret: float
            Target return of portfolio.
        allow_short: bool, optional
            If 'False' construct a long-only portfolio.
            If 'True' allow shorting, i.e. negative weights.
        market_neutral: bool, optional
            If 'False' sum of weights equals one.
            If 'True' sum of weights equal zero, i.e. create a
                market neutral portfolio (implies allow_short=True).

        Returns
        -------
        weights: pandas.Series
            Optimal asset weights.
        """
        if not isinstance(cov_mat, pd.DataFrame):
            raise ValueError("Covariance matrix is not a DataFrame")

        if not isinstance(exp_rets, pd.Series):
            raise ValueError("Expected returns is not a Series")

        if not isinstance(target_ret, float):
            raise ValueError("Target return is not a float")

        if not cov_mat.index.equals(exp_rets.index):
            raise ValueError("Indices do not match")

        n = len(cov_mat)

        P = opt.matrix(cov_mat.values)
        q = opt.matrix(0.0, (n, 1))

        # Constraints Gx <= h
        # exp_rets*x >= target_ret and x >= 0
        G = opt.matrix(np.vstack((-exp_rets.values,
                                  -np.identity(n))))
        h = opt.matrix(np.vstack((-target_ret,
                                  +np.zeros((n, 1)))))

        # Constraints Ax = b
        # sum(x) = 1
        A = opt.matrix(1.0, (1, n))

        b = opt.matrix(1.0)

        # Solve
        optsolvers.options['show_progress'] = False
        sol = optsolvers.qp(P, q, G, h, A, b)

        if sol['status'] != 'optimal':
            warnings.warn("Convergence problem")

        # Put weights into a labeled series
        weights = np.append(np.squeeze(sol['x']), [0.0])
        return weights


# Risk optimization
class ERI(APrioriAgent):
    """
    This algorithm uses Extreme Risk Index to optimize a constant rebalance portfolio
    References:
        Extreme Risk Index:
        https://arxiv.org/pdf/1505.04045.pdf
    """
    def __repr__(self):
        return "Extreme Risk Index"

    def __init__(self, window=300, k=0.1, mpc=0.3, beta=0.999, fiat="BTC", name='ERI'):
        super().__init__(fiat=fiat, name=name)
        self.window = window - 1
        self.k = k
        self.mpc = mpc
        self.beta = beta

        # Extreme risk index
        self.cons = [
            {'type': 'eq', 'fun': lambda w: w.sum() - 1}, # Simplex region
            {'type': 'ineq', 'fun': lambda w: w}, # Positive bound
            {'type': 'ineq', 'fun': lambda w: self.mpc - np.linalg.norm(w, ord=np.inf)}
            # Maximum position concentration constraint
        ]

    def predict(self, obs):
        """
        Performs prediction given environment observation
        :param obs: pandas DataFrame: Environment observation
        """
        prices = obs.xs('open', level=1, axis=1)

        log_returns = np.hstack([prices.rolling(2).apply(
            lambda x: np.log(safe_div(x[-1], x[-2]))).dropna().values, np.zeros((self.window, 1))])

        return log_returns

    # Pareto Extreme Risk Index
    @staticmethod
    def estimate_alpha(R):
        return safe_div((R.shape[0] - 1), np.log(safe_div(R[:-1], R[-1])).sum())

    @staticmethod
    def estimate_gamma(alpha, Z, w):
        return (1 / (Z.shape[0] - 1)) * np.power(np.clip(w * Z[:-1].T, 0.0, np.inf), alpha).sum()

    def loss(self, w, alpha, Z, b):
        return self.estimate_gamma(alpha, Z, w) + np.linalg.norm(b - w) ** 2

    def update(self, b, x):
        last_x = x[-1, :]

        R, Z = risk.polar_returns(-x, self.k)
        alpha = self.estimate_alpha(R)

        self.r_hat = self.beta * self.r_hat + (1 - self.beta) * last_x

        cons = self.cons + [{'type': 'eq', 'fun': lambda w:
            np.dot(w, self.r_hat) - np.clip(0.001, 0.0, self.r_hat.max() / np.sqrt(2))}]

        b = minimize(
            self.loss,
            b,
            args=(alpha, Z, b),
            constraints=cons,
            options={'maxiter': 3333},
            tol=1e-7,
            bounds=tuple((0,1) for _ in range(b.shape[0]))
        )

        # Log variables
        self.log['r_hat'] = "%.4f, %.4f, %.4f" % (self.r_hat.min(), self.r_hat.mean(), self.r_hat.max())
        self.log['alpha'] = "%.2f" % alpha
        self.log['gamma'] = "%.8f" % b['fun']
        self.log['CC'] = "%.2f" % np.power(b['x'], 2).sum() ** -1
        self.log['nit'] = "%d" % b['nit']
        self.log['k'] = "%.2f" % self.k
        self.log['mpc'] = "%.2f" % self.mpc
        self.log['beta'] = "%.4f" % self.beta

        return b['x'] # Truncate small errors

    def rebalance(self, obs):
        """
        Performs portfolio rebalance within environment
        :param obs: pandas DataFrame: Environment observation
        :return: numpy array: Portfolio vector
        """
        if not self.step:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = self.b = array_normalize(action)
            self.r_hat = np.zeros(n_pairs)
            return self.crp

        self.b = self.update(self.b, self.predict(obs))
        return self.b


## Agent Pipeline
class Pipeline(APrioriAgent):
    def __repr__(self):
        return "Pipeline"

    def __init__(self, factor, risk, fiat="BTC", name='Pipeline'):
        super().__init__(fiat=fiat, name=name)
        self.factor = factor
        self.risk = risk

    def rebalance(self, obs):
        """
         Performs portfolio rebalance within environment
         :param obs: pandas DataFrame: Environment observation
         :return: numpy array: Portfolio vector
         """
        if not self.step:
            n_pairs = obs.columns.levels[0].shape[0]
            action = np.ones(n_pairs)
            action[-1] = 0
            self.crp = self.b = array_normalize(action)
            return self.crp

        self.factor.b = self.b
        self.risk.b = self.factor.rebalance(obs)
        self.b = self.risk.rebalance(obs)

        self.factor.step = self.risk.step = self.step

        return self.b