python source code of forecast

# -*- coding: utf-8 -*-
#
# License:          This module is released under the terms of the LICENSE file
#                   contained within this applications INSTALL directory

"""
Defines the ForecastModel class, which encapsulates model functions used in
forecast model fitting, as well as their number of parameters and
initialisation parameters.
"""

# -- Coding Conventions
#    http://www.python.org/dev/peps/pep-0008/   -   Use the Python style guide
# http://sphinx.pocoo.org/rest.html          -   Use Restructured Text for
# docstrings

import itertools
# -- Public Imports
import logging

import numpy as np
import pandas as pd
from pandas.tseries.holiday import Holiday, AbstractHolidayCalendar, \
    MO, nearest_workday, next_monday, next_monday_or_tuesday, \
    GoodFriday, EasterMonday, USFederalHolidayCalendar
from pandas.tseries.offsets import DateOffset

# -- Private Imports
from anticipy import model_utils

# -- Globals
logger = logging.getLogger(__name__)

dict_fourier = {  # Default configuration for fourier-based models
    'period': 365.25,  # days in year
    'harmonics': 10  # TODO: evaluate different harmonics values
}


# -- Functions

# ---- Utility functions


def logger_info(msg, data):
    # Convenience function for easier log typing
    logger.info(msg + '\n%s', data)


def _get_f_init_params_default(n_params):
    # Generate a default function for initialising model parameters: use
    # random values between 0 and 1
    return lambda a_x=None, a_y=None, a_date=None, is_mult=False:\
        np.random.uniform(low=0.001, high=1, size=n_params)


def _get_f_bounds_default(n_params):
    # Generate a default function for model parameter boundaries. Default
    # boundaries are (-inf, inf)
    return lambda a_x=None, a_y=None, a_date=None: (
        n_params * [-np.inf], n_params * [np.inf])


def _get_f_add_2_f_models(forecast_model1, forecast_model2):
    # Add model functions of 2 ForecastModels
    def f_add_2_f_models(a_x, a_date, params, is_mult=False, **kwargs):
        params1 = params[0:forecast_model1.n_params]
        params2 = params[forecast_model1.n_params:]
        return (
            forecast_model1.f_model(
                a_x,
                a_date,
                params1,
                is_mult=False,
                **kwargs) +
            forecast_model2.f_model(
                a_x,
                a_date,
                params2,
                is_mult=False,
                **kwargs))

    return f_add_2_f_models


def _get_f_mult_2_f_models(forecast_model1, forecast_model2):
    # Multiply model functions of 2 ForecastModels
    def f_mult_2_f_models(a_x, a_date, params, is_mult=False, **kwargs):
        params1 = params[0:forecast_model1.n_params]
        params2 = params[forecast_model1.n_params:]
        return (
            forecast_model1.f_model(
                a_x,
                a_date,
                params1,
                is_mult=True,
                **kwargs) *
            forecast_model2.f_model(
                a_x,
                a_date,
                params2,
                is_mult=True,
                **kwargs))

    return f_mult_2_f_models


def _get_f_add_2_f_init_params(f_init_params1, f_init_params2):
    # Compose parameter initialisation functions of 2 ForecastModels, using
    # addition
    def f_add_2_f_init_params(a_x, a_y, a_date=None, is_mult=False):
        return np.concatenate(
            [f_init_params1(a_x, a_y, a_date, is_mult=False),
             f_init_params2(a_x, a_y, a_date, is_mult=False)])

    return f_add_2_f_init_params


def _get_f_mult_2_f_init_params(f_init_params1, f_init_params2):
    # Compose parameter initialisation functions of 2 ForecastModels, using
    # multiplication
    def f_mult_2_f_init_params(a_x, a_y, a_date=None, is_mult=False):
        return np.concatenate(
            [f_init_params1(a_x, a_y, a_date, is_mult=True),
             f_init_params2(a_x, a_y, a_date, is_mult=True)])

    return f_mult_2_f_init_params


def _get_f_concat_2_bounds(forecast_model1, forecast_model2):
    # Compose parameter boundary functions of 2 ForecastModels
    def f_add_2_f_bounds(a_x, a_y, a_date=None):
        return np.concatenate(
            (forecast_model1.f_bounds(
                a_x, a_y, a_date), forecast_model2.f_bounds(
                a_x, a_y, a_date)), axis=1)

    return f_add_2_f_bounds


def _f_validate_input_default(a_x, a_y, a_date):
    # Default input validation funciton for a ForecastModel. Always returns
    # True
    return True


# -- Classes

class ForecastModel:
    """
    Class that encapsulates model functions for use in forecasting, as well as
    their number of parameters and functions for parameter initialisation.

    A ForecastModel instance is initialized with a model name, a number of
    model parameters, and a model function. Class instances are
    callable - when called as a function, their internal model function is
    used. The main purpose of ForecastModel objects is to generate predicted
    values for a time series, given a set of parameters. These values can be
    compared to the original series to get an array of residuals::

        y_predicted = model(a_x, a_date, params)
        residuals = (a_y - y_predicted)

    This is used in an optimization loop to obtain the optimal parameters for
    the model.

    The reason for using this class instead of raw model functions is that
    ForecastModel supports function composition::

        model_sum = fcast_model1 + fcast_model2
        # fcast_model 1 and 2 are ForecastModel instances, and so is model_sum
        a_y1 = fcast_model1(
            a_x, a_date, params1) + fcast_model2(a_x, a_date, params2)
        params = np.concatenate([params1, params2])
        a_y2 = model_sum(a_x, a_date, params)
        a_y1 == a_y2  # True

    Forecast models can be added or multiplied, with the + and * operators.
    Multiple levels of composition are supported::

        model = (model1 + model2) * model3

    Model composition is used to aggregate trend and seasonality model
    components, among other uses.

    Model functions have the following signature:

    - f(a_x, a_date, params, is_mult)
    - a_x : array of floats
    - a_date: array of dates, same length as a_x. Only required for date-aware
      models, e.g. for weekly seasonality.
    - params: array of floats - model parameters - the optimisation loop
      updates this to fit our actual values. Each
      model function uses a fixed number of parameters.
    - is_mult: boolean. True if the model is being used with multiplicative
      composition. Required because
      some model functions (e.g. steps) have different behaviour
      when added to other models than when multiplying them.
    - returns an array of floats - with same length as a_x - output of the
      model defined by this object's modelling function f_model and the
      current set of parameters

    By default, model parameters are initialized as random values between
    0 and 1. It is possible to define a parameter initialization function
    that picks initial values based on the original time series.
    This is passed during ForecastModel creation with the argument
    f_init_params. Parameter initialization is compatible with model
    composition: the initialization function of each component will be used
    for that component's parameters.

    Parameter initialisation functions have the following signature:

    - f_init_params(a_x, a_y, is_mult)
    - a_x: array of floats - same length as time series
    - a_y: array of floats - time series values
    - returns an array of floats - with length equal to this object's n_params
      value

    By default, model parameters have no boundaries. However, it is possible
    to define a boundary function for a model, that sets boundaries for each
    model parameter, based on the input time series. This is passed during
    ForecastModel creation with the argument f_bounds.
    Boundary definition is compatible with model composition:
    the boundary function of each component will be used for that component's
    parameters.

    Boundary functions have the following signature:

    - f_bounds(a_x, a_y, a_date)
    - a_x: array of floats - same length as time series
    - a_y: array of floats - time series values
    - a_date: array of dates, same length as a_x. Only required for date-aware
      models, e.g. for weekly seasonality.
    - returns a tuple of 2 arrays of floats. The first defines minimum
      parameter boundaries, and the second the maximum parameter boundaries.

    As an option, we can assign a list of input validation functions to a
    model. These functions analyse the inputs that will be used for fitting a
    model, returning True if valid, and False otherwise. The forecast logic
    will skip a model from fitting if any of the validation functions for that
    model returns False.

    Input validation functions have the following signature:

    - f_validate_input(a_x, a_y, a_date)
    - See the description of model functions above for more details on these
      parameters.

    Our input time series should meet the following constraints:

    - Minimum required samples depends on number of model parameters
    - May include null values
    - May include multiple values per sample
    - A date array is only required if the model is date-aware

    Class Usage::

        model_x = ForecastModel(name, n_params, f_model, f_init_params,
        l_f_validate_input)
        # Get model name
        model_name = model_x.name
        # Get number of model parameters
        n_params = model_x.n_params
        # Get parameter initialisation function
        f_init_params = model_x.f_init_params
        # Get initial parameters
        init_params = f_init_params(t_values, y_values)
        # Get model fitting function
        f_model = model_x.f_model
        # Get model output
        y = f_model(a_x, a_date, parameters)

    The following pre-generated models are available. They are available as attributes from this module: # noqa

    .. csv-table:: Forecast models
       :header: "name", "params", "formula","notes"
       :widths: 20, 10, 20, 40

       "model_null",0, "y=0", "Does nothing.
       Used to disable components (e.g. seasonality)"
       "model_constant",1, "y=A", "Constant model"
       "model_linear",2, "y=Ax + B", "Linear model"
       "model_linear_nondec",2, "y=Ax + B", "Non decreasing linear model.
       With boundaries to ensure model slope >=0"
       "model_quasilinear",3, "y=A*(x^B) + C", "Quasilinear model"
       "model_exp",2, "y=A * B^x", "Exponential model"
       "model_decay",4, "Y = A * e^(B*(x-C)) + D", "Exponential decay model"
       "model_step",2, "y=0 if x<A, y=B if x>=A", "Step model"
       "model_two_steps",4, "see model_step", "2 step models.
       Parameter initialization is aware of # of steps."
       "model_sigmoid_step",3, "y = A + (B - A) / (1 + np.exp(- D * (x - C)))
       ", "Sigmoid step model"
       "model_sigmoid",3, "y = A + (B - A) / (1 + np.exp(- D * (x - C)))", "
       Sigmoid model"
       "model_season_wday",7, "see desc.", "Weekday seasonality model.
       Assigns a constant value to each weekday"
       "model_season_wday",6, "see desc.", "6-param weekday seasonality model.
       As above, with one constant set to 0."
       "model_season_wday_2",2, "see desc.", "Weekend seasonality model.
       Assigns a constant to each of weekday/weekend"
       "model_season_month",12, "see desc.", "Month seasonality model.
       Assigns a constant value to each month"
       "model_season_fourier_yearly",10, "see desc", "Fourier
       yearly seasonality model"

    """

    def __init__(
            self,
            name,
            n_params,
            f_model,
            f_init_params=None,
            f_bounds=None,
            l_f_validate_input=None):
        """
        Create ForecastModel

        :param name: Model name
        :type name: basestring
        :param n_params: Number of parameters for model function
        :type n_params: int
        :param f_model: Model function
        :type f_model: function
        :param f_init_params: Parameter initialisation function
        :type f_init_params: function
        :param f_bounds: Boundary function
        :type f_bounds: function
        """
        self.name = name
        self.n_params = n_params
        self.f_model = f_model
        if f_init_params is not None:
            self.f_init_params = f_init_params
        else:
            # Default initial parameters: random values between 0 and 1
            self.f_init_params = _get_f_init_params_default(n_params)

        if f_bounds is not None:
            self.f_bounds = f_bounds
        else:
            self.f_bounds = _get_f_bounds_default(n_params)

        if l_f_validate_input is None:
            self.l_f_validate_input = [_f_validate_input_default]
        else:
            if not isinstance(l_f_validate_input, (list,)):
                self.l_f_validate_input = [l_f_validate_input]
            else:
                self.l_f_validate_input = l_f_validate_input

        # TODO - REMOVE THIS - ASSUME NORMALIZED INPUT
        def _get_f_init_params_validated(f_init_params):
            # Adds argument validation to a parameter initialisation function
            def f_init_params_validated(
                    a_x=None, a_y=None, a_date=None, is_mult=False):
                if a_x is not None and pd.isnull(a_x).any():
                    raise ValueError('a_x cannot have null values')
                return f_init_params(a_x, a_y, a_date, is_mult)

            return f_init_params_validated

        # Add logic to f_init_params that validates input
        self.f_init_params = _get_f_init_params_validated(self.f_init_params)

    def __call__(self, a_x, a_date, params, is_mult=False, **kwargs):
        # assert len(params)==self.n_params
        return self.f_model(a_x, a_date, params, is_mult, **kwargs)

    def __str__(self):
        return self.name

    def __repr__(self):
        return 'ForecastModel:{}'.format(self.name)

    def __add__(self, forecast_model):
        # Check for nulls
        if self.name == 'null':
            return forecast_model
        if forecast_model.name == 'null':
            return self
        name = '({}+{})'.format(self.name, forecast_model.name)
        n_params = self.n_params + forecast_model.n_params
        f_model = _get_f_add_2_f_models(self, forecast_model)
        f_init_params = _get_f_add_2_f_init_params(
            self.f_init_params, forecast_model.f_init_params)
        f_bounds = _get_f_concat_2_bounds(self, forecast_model)
        l_f_validate_input = list(
            set(self.l_f_validate_input + forecast_model.l_f_validate_input))
        return ForecastModel(
            name,
            n_params,
            f_model,
            f_init_params,
            f_bounds=f_bounds,
            l_f_validate_input=l_f_validate_input)

    def __radd__(self, other):
        return self.__add__(other)

    def __mul__(self, forecast_model):
        if self.name == 'null':
            return forecast_model
        if forecast_model.name == 'null':
            return self
        name = '({}*{})'.format(self.name, forecast_model.name)
        n_params = self.n_params + forecast_model.n_params
        f_model = _get_f_mult_2_f_models(self, forecast_model)
        f_init_params = _get_f_mult_2_f_init_params(
            self.f_init_params, forecast_model.f_init_params)
        f_bounds = _get_f_concat_2_bounds(self, forecast_model)
        l_f_validate_input = list(
            set(self.l_f_validate_input + forecast_model.l_f_validate_input))
        return ForecastModel(
            name,
            n_params,
            f_model,
            f_init_params,
            f_bounds=f_bounds,
            l_f_validate_input=l_f_validate_input)

    def __rmul__(self, other):
        return self.__mul__(other)

    def __eq__(self, other):
        if isinstance(self, other.__class__):
            return self.name == other.name
        return NotImplemented

    def __ne__(self, other):
        x = self.__eq__(other)
        if x is not NotImplemented:
            return not x
        return NotImplemented

    def __hash__(self):
        return hash(self.name)

    def __lt__(self, other):
        return self.name < other.name

    def validate_input(self, a_x, a_y, a_date):
        try:
            l_result = [f_validate_input(a_x, a_y, a_date)
                        for f_validate_input in self.l_f_validate_input]
        except AssertionError:
            return False
        return True


# - Null model: 0


def _f_model_null(a_x, a_date, params, is_mult=False, **kwargs):
    # This model does nothing - used to disable model components
    # (e.g. seasonality) when adding/multiplying multiple functions
    return float(is_mult)  # Returns 1 if multiplying, 0 if adding


model_null = ForecastModel('null', 0, _f_model_null)


# - Constant model: :math:`Y = A`

def _f_model_constant(a_x, a_date, params, is_mult=False, **kwargs):
    [A] = params
    y = np.full(len(a_x), A)
    return y


def _f_init_params_constant(a_x=None, a_y=None, a_date=None, is_mult=False):
    if a_y is None:
        return np.random.uniform(0, 1, 1)
    else:
        return np.nanmean(a_y) + np.random.uniform(0, 1, 1)


model_constant = ForecastModel(
    'constant',
    1,
    _f_model_constant,
    _f_init_params_constant)


# - Naive model: Y = Y(x-1)
# Note: This model requires passing the actuals data - it is not fitted by
# regression. We still pass it to forecast.fit_model() for consistency
# with the rest of the library

def _f_model_naive(a_x, a_date, params, is_mult=False, df_actuals=None):
    if df_actuals is None:
        raise ValueError('model_naive requires a df_actuals argument')
    df_out_tmp = pd.DataFrame({'date': a_date, 'x': a_x})
    df_out = (
        # This is not really intended to work with multiple values per sample
        df_actuals.drop_duplicates('x').merge(df_out_tmp, how='outer')
    )
    df_out['y'] = df_out.y.shift(1).fillna(
        method='ffill').fillna(
        method='bfill')
    df_out = df_out.loc[df_out.x.isin(a_x)]
    # df_out = df_out_tmp.merge(df_out, how='left')
    # TODO: CHECK THAT X,DATE order is preserved
    # TODO: df_out = df_out.merge(df_out_tmp, how='right')
    return df_out.y.values


model_naive = ForecastModel('naive', 0, _f_model_naive)


# - Seasonal naive model
# Note: This model requires passing the actuals data - it is not fitted by
# regression. We still pass it to forecast.fit_model() for consistency
# with the rest of the library

def _fillna_wday(df):
    df = df.copy()
    df['wday'] = df.date.dt.weekday
    df_tmp = df[['date', 'x']].copy()
    for wday in np.arange(0, 7):
        wday_name = 'wday_{}'.format(wday)
        df_tmp[wday_name] = (
            # for each wday column, set to null all values from other weekdays
            # fill nulls with last weekly sample
            df.y.where(df.wday == wday, np.NaN) .fillna(
                # shift so that model for each sample is last non-null weekly
                # sample
                method='ffill').shift(1).where(
                # set values for other weekdays to null, so we can aggregate
                # with sum
                df.wday == wday, np.NaN)
        )

    # logger_info('debug: df_tmp: ', df_tmp)

    # Aggregate: add all weekly columns together, keep null only if all
    # columns are null
    def aggregate_wday(s_tmp):
        if np.all(np.isnan(s_tmp)):
            return np.NaN
        else:
            return np.nansum(s_tmp)

    df['y_out'] = df_tmp.loc[:, df_tmp.columns.str.startswith(
        'wday_')].apply(aggregate_wday, axis=1)
    return df


def _f_model_snaive_wday(a_x, a_date, params, is_mult=False, df_actuals=None):
    if df_actuals is None:
        raise ValueError('model_snaive_wday requires a df_actuals argument')

    df_actuals_model = _fillna_wday(df_actuals.drop_duplicates('x'))

    df_last_week = df_actuals_model.drop_duplicates('wday', keep='last')[
        ['wday', 'y']]
    df_last_week['y_out'] = df_last_week['y']
    df_last_week = df_last_week[['wday', 'y_out']]

    df_out_tmp = pd.DataFrame({'date': a_date, 'x': a_x})
    df_out_tmp['wday'] = df_out_tmp.date.dt.weekday

    df_out_actuals = (
        df_actuals_model.merge(df_out_tmp, how='left')
    )
    df_out_extrapolated = (
        df_out_tmp.loc[~df_out_tmp.date.isin(df_actuals_model.date)].merge(
            df_last_week).sort_values('date').reset_index(drop=True)
    )

    df_out = pd.concat([df_out_actuals, df_out_extrapolated], sort=False)

    # Note: the line below causes trouble when samples are filtered from a_x,
    # a_date due to find_outliers
    df_out = df_out.loc[df_out.x.isin(a_x)]

    return df_out.y_out.values


model_snaive_wday = ForecastModel('snaive_wday', 0, _f_model_snaive_wday)


# - Spike model: :math:`Y = A`, when x_min <= X < x_max
def _f_model_spike(a_x, a_date, params, is_mult=False, **kwargs):
    [A, x_min, x_max] = params
    if is_mult:
        c = 1
    else:
        c = 0
    y = np.concatenate((
        np.full(int(x_min), c),
        np.full(int(x_max - x_min), A),
        np.full(len(a_x) - int(x_max), c)
    ))
    return y


def _f_init_params_spike(a_x=None, a_y=None, a_date=None, is_mult=False):
    """ params are spike height, x start, x end """
    # if not a_y.any():
    if a_y is None:
        return [1] + np.random.uniform(0, 1, 1) + [2]
    else:
        diffs = np.diff(a_y)
        # if diffs:
        if True:
            diff = max(diffs)
            x_start = np.argmax(diffs)
            x_end = x_start + 1
            return np.array([diff, x_start, x_end])


model_spike = ForecastModel('spike', 3, _f_model_spike, _f_init_params_spike)


# - Spike model for dates - dates are fixed for each model

def _f_model_spike_date(
        a_x,
        a_date,
        params,
        date_start,
        date_end,
        is_mult=False):
    [A] = params
    mask_spike = (a_date >= date_start) * (a_date < date_end)
    if is_mult:
        y = mask_spike * A + ~mask_spike
    else:
        y = mask_spike * A

    return y


def _f_init_params_spike(a_x=None, a_y=None, a_date=None, is_mult=False):
    """ params are spike height, x start, x end """
    if a_y is None:
        return np.concatenate([np.array([1]) + np.random.uniform(0, 1, 1)])
    else:
        diffs = np.diff(a_y)
        # if diffs:
        if True:
            diff = max(diffs)
            return np.array([diff])
        # else:
        #     rand = np.random.randint(1, len(a_y) - 1)
        #     return [1]


def get_model_spike_date(date_start, date_end):
    f_model = (
        lambda a_x, a_date, params, is_mult=False, **kwargs:
        _f_model_spike_date(a_x, a_date, params, date_start, date_end, is_mult)
    )
    model_spike_date = ForecastModel(
        'spike_date[{},{}]'.format(
            pd.to_datetime(date_start).date(),
            pd.to_datetime(date_end).date()),
        1,
        f_model,
        _f_init_params_spike)
    return model_spike_date


# - Linear model: :math:`Y = A*x + B`

def _f_model_linear(a_x, a_date, params, is_mult=False, **kwargs):
    (A, B) = params
    y = A * a_x + B
    return y


def _f_init_params_linear(a_x=None, a_y=None, a_date=None, is_mult=False):
    if a_y is None:
        return np.random.uniform(low=0, high=1, size=2)
    else:  # TODO: Improve this
        if a_x is not None:
            a_x_size = np.unique(a_x).size - 1
        else:
            a_x_size = a_y.size - 1
        A = (a_y[-1] - a_y[0]) / a_x_size
        B = a_y[0]
        # Uniform low= 0*m, high = 1*m
        return np.array([A, B])


model_linear = ForecastModel(
    'linear',
    2,
    _f_model_linear,
    _f_init_params_linear)


def f_init_params_linear_nondec(
        a_x=None,
        a_y=None,
        a_date=None,
        is_mult=False):
    params = _f_init_params_linear(a_x, a_y, a_date)
    if params[0] < 0:
        params[0] = 0
    return params


def f_bounds_linear_nondec(a_x=None, a_y=None, a_date=None):
    # first param should be between 0 and inf
    return [0, -np.inf], [np.inf, np.inf]


model_linear_nondec = ForecastModel('linear_nondec', 2, _f_model_linear,
                                    f_init_params=f_init_params_linear_nondec,
                                    f_bounds=f_bounds_linear_nondec)


# - QuasiLinear model: :math:`Y = A t^{B} + C`

def _f_model_quasilinear(a_x, a_date, params, is_mult=False, **kwargs):
    (A, B, C) = params
    y = A * np.power(a_x, B) + C
    return y


model_quasilinear = ForecastModel('quasilinear', 3, _f_model_quasilinear)


# - Exponential model: math::  Y = A * B^t
# TODO: Deprecate - not safe to use
def _f_model_exp(a_x, a_date, params, is_mult=False, **kwargs):
    (A, B) = params
    y = A * np.power(B, a_x)
    return y


model_exp = ForecastModel('exponential', 2, _f_model_exp)


# - Exponential decay model: math::  Y = A * e^(B*(x-C)) + D
def _f_model_decay(a_x, a_date, params, is_mult=False, **kwargs):
    (A, B, C, D) = params
    y = A * np.exp(B * (a_x + C)) + D
    return y


def f_init_params_decay(a_x=None, a_y=None, a_date=None, is_mult=False):
    if a_y is None:
        return np.array([0, 0, 0, 0])
    A = a_y[0] - a_y[-1]
    B = np.log(np.min(a_y) / np.max(a_y)) / (len(a_y) - 1)
    if B > 0 or B == -np.inf:
        B = -0.5
    C = 0.
    D = a_y[-1]
    return np.array([A, B, C, D])


def f_bounds_decay(a_x=None, a_y=None, a_date=None):
    # B should be between 0 and inf
    return [-np.inf, -np.inf, 0, -np.inf], [np.inf, 0, np.inf, np.inf]


model_decay = ForecastModel('decay', 4, _f_model_decay,
                            f_init_params=f_init_params_decay,
                            f_bounds=f_bounds_decay)


# - Step function: :math:`Y = {0, if x < A | B, if x >= A}`
# A is the time of step, and B is the step
def _f_step(a_x, a_date, params, is_mult=False, **kwargs):
    (A, B) = params
    if is_mult:
        y = 1 + (B - 1) * np.heaviside(a_x - A, 1)
    else:
        y = B * np.heaviside(a_x - A, 1)
    return y


# TODO: Implement initialisation for multiplicative composition


def _f_init_params_step(a_x=None, a_y=None, a_date=None, is_mult=False):
    if a_y is None:
        return np.random.uniform(0, 1, 2)
    else:
        if a_y.ndim > 1:
            a_y = a_y[:, 0]
        df = pd.DataFrame({'b': a_y})
        # max difference between consecutive values
        df['diff'] = df.diff().abs()
        # if is_mult, replace above line with something like
        # np.concatenate([[np.NaN],a_y[:-1]/a_y[1:]])
        a = df.nlargest(1, 'diff').index[0]
        b = df['diff'].iloc[a]
        return np.array([a, b * 2])


# TODO: Add boundaries for X axis
model_step = ForecastModel('step', 2, _f_step, _f_init_params_step)


# - Spike model for dates - dates are fixed for each model

def _f_model_step_date(a_x, a_date, params, date_start, is_mult=False):
    [A] = params
    mask_step = (a_date >= date_start).astype(float)
    if is_mult:
        # y = mask_step*A + ~mask_step
        y = mask_step * (A - 1) + 1
    else:
        y = mask_step * A

    return y


# TODO: Implement initialisation for multiplicative composition
def _f_init_params_step_date(a_x=None, a_y=None, a_date=None, is_mult=False):
    if a_y is None:
        return np.random.uniform(0, 1, 1)
    else:
        if a_y.ndim > 1:
            a_y = a_y[:, 0]
        df = pd.DataFrame({'b': a_y})
        # max difference between consecutive values
        df['diff'] = df.diff().abs()
        # if is_mult, replace above line with something like
        # np.concatenate([[np.NaN],a_y[:-1]/a_y[1:]])
        a = df.nlargest(1, 'diff').index[0]
        b = df['diff'].iloc[a]
        return np.array([b * 2])


def get_model_step_date(date_start):
    date_start = pd.to_datetime(date_start)
    f_model = (
        lambda a_x, a_date, params, is_mult=False, **kwargs:
        _f_model_step_date(a_x, a_date, params, date_start, is_mult)
    )
    model_step_date = ForecastModel('step_date[{}]'.format(date_start.date()),
                                    1, f_model, _f_init_params_step_date)
    return model_step_date


# Two step functions
def _f_n_steps(n, a_x, a_date, params, is_mult=False):
    if is_mult:
        y = 1
    else:
        y = 0

    for i in range(0, n + 1, 2):
        A, B = params[i: i + 2]
        if is_mult:
            y = y * _f_step(a_x, a_date, (A, B), is_mult)
        else:
            y = y + _f_step(a_x, a_date, (A, B), is_mult)
    return y


def _f_two_steps(a_x, a_date, params, is_mult=False, **kwargs):
    return _f_n_steps(
        n=2,
        a_x=a_x,
        a_date=a_date,
        params=params,
        is_mult=is_mult)


def _f_init_params_n_steps(
        n=2,
        a_x=None,
        a_y=None,
        a_date=None,
        is_mult=False):
    if a_y is None:
        return np.random.uniform(0, 1, n * 2)
    else:
        # max difference between consecutive values
        if a_y.ndim > 1:
            a_y = a_y[:, 0]
        df = pd.DataFrame({'b': a_y})
        df['diff'] = df.diff().abs()
        # if is_mult, replace above line with something like
        # np.concatenate([[np.NaN],a_y[:-1]/a_y[1:]])
        a = df.nlargest(n, 'diff').index[0:n].values
        b = df['diff'].iloc[a].values
        params = []
        for i in range(0, n):
            params += [a[i], b[i]]
        return np.array(params)


def _f_init_params_two_steps(a_x=None, a_y=None, a_date=None, is_mult=False):
    return _f_init_params_n_steps(
        n=2,
        a_x=a_x,
        a_y=a_y,
        a_date=a_date,
        is_mult=is_mult)


model_two_steps = ForecastModel(
    'two_steps',
    2 * 2,
    _f_two_steps,
    _f_init_params_two_steps)


# - Sigmoid step function: `Y = {A + (B - A) / (1 + np.exp(- D * (a_x - C)))}`
# Spans from A to B, C is the position of the step in x axis
# and D is how steep the increase is
def _f_sigmoid(a_x, a_date, params, is_mult=False, **kwargs):
    (B, C, D) = params
    if is_mult:
        A = 1
    else:
        A = 0
    # TODO check if a_x is negative
    y = A + (B - A) / (1 + np.exp(- D * (a_x - C)))
    return y


def _f_init_params_sigmoid_step(
        a_x=None,
        a_y=None,
        a_date=None,
        is_mult=False):
    if a_y is None:
        return np.random.uniform(0, 1, 3)
    else:
        if a_y.ndim > 1:
            a_y = a_y[:, 0]
        df = pd.DataFrame({'y': a_y})
        # max difference between consecutive values
        df['diff'] = df.diff().abs()
        c = df.nlargest(1, 'diff').index[0]
        b = df.loc[c, 'y']
        d = b * b
        return b, c, d


def _f_init_bounds_sigmoid_step(a_x=None, a_y=None, a_date=None):
    if a_y is None:
        return [-np.inf, -np.inf, 0.], 3 * [np.inf]

    if a_y.ndim > 1:
        a_y = a_y[:, 0]
    if a_x.ndim > 1:
        a_x = a_x[:, 0]
    diff = max(a_y) - min(a_y)
    b_min = -2 * diff
    b_max = 2 * diff
    c_min = min(a_x)
    c_max = max(a_x)
    d_min = 0.
    d_max = np.inf
    return [b_min, c_min, d_min], [b_max, c_max, d_max]


# In this model, parameter initialization is aware of number of steps
model_sigmoid_step = ForecastModel(
    'sigmoid_step',
    3,
    _f_sigmoid,
    _f_init_params_sigmoid_step,
    f_bounds=_f_init_bounds_sigmoid_step)

model_sigmoid = ForecastModel('sigmoid', 3, _f_sigmoid)


# Ramp functions - used for piecewise linear models

# example : model_linear_pw2 = model_linear + model_ramp
# example 2: model_linear_p23 = model_linear + model_ramp + model_ramp

# - Ramp function: :math:`Y = {0, if x < A | B, if x >= A}`
# A is the time of step, and B is the step
def _f_ramp(a_x, a_date, params, is_mult=False, **kwargs):
    (A, B) = params
    if is_mult:
        y = 1 + (a_x - A) * (B) * np.heaviside(a_x - A, 1)
    else:
        y = (a_x - A) * B * np.heaviside(a_x - A, 1)
    return y


def _f_init_params_ramp(a_x=None, a_y=None, a_date=None, is_mult=False):
    # TODO: set boundaries: a_x (0.2, 0.8)
    if a_y is None:
        if a_x is not None:
            nfirst_last = int(np.ceil(0.15 * a_x.size))
            a = np.random.uniform(a_x[nfirst_last], a_x[-nfirst_last - 1], 1)
        else:
            a = np.random.uniform(0, 1, 1)
        b = np.random.uniform(0, 1, 1)

        return np.concatenate([a,
                               b])
    else:
        # TODO: FILTER A_Y BY 20-80 PERCENTILE IN A_X
        df = pd.DataFrame({'b': a_y})
        if a_x is not None:
            #
            df['x'] = a_x
            # Required because we support input with multiple samples per x
            # value
            df = df.drop_duplicates('x')
            df = df.set_index('x')
        # max difference between consecutive values -- this assumes no null
        # values in series
        df['diff2'] = df.diff().diff().abs()

        # We ignore the last 15% of the time series
        skip_samples = int(np.ceil(df.index.size * 0.15))

        a = (df.head(-skip_samples).tail(
            -skip_samples).nlargest(1, 'diff2').index[0]
        )
        b = df['diff2'].loc[a]
        # TODO: replace b with estimation of slope in segment 2
        #   minus slope in segment 1 - see init_params_linear
        return np.array([a, b])


def _f_init_bounds_ramp(a_x=None, a_y=None, a_date=None):
    if a_x is None:
        a_min = -np.inf
        a_max = np.inf
    else:
        # a_min = np.min(a_x)
        nfirst_last = int(np.ceil(0.15 * a_x.size))
        a_min = a_x[nfirst_last]
        a_max = a_x[-nfirst_last]
        # a_min = np.percentile(a_x, 15)
        # a_max = np.percentile(a_x,85)
    if a_y is None:
        b_min = -np.inf
        b_max = np.inf
    else:
        # TODO: FILTER A_Y BY 20-80 PERCENTILE IN A_X
        # df = pd.DataFrame({'b': a_y})
        # #max_diff2 = np.max(df.diff().diff().abs())
        # max_diff2 = np.max(np.abs(np.diff(np.diff(a_y))))
        #
        # b_min = -2*max_diff2
        # b_max = 2*max_diff2

        b_min = -np.inf
        b_max = np.inf
    # logger_info('DEBUG: BOUNDS:',(a_min, b_min,a_max, b_max))
    return ([a_min, b_min], [a_max, b_max])


model_ramp = ForecastModel(
    'ramp',
    2,
    _f_ramp,
    _f_init_params_ramp,
    _f_init_bounds_ramp)


# - Weekday seasonality

def _f_model_season_wday(a_x, a_date, params, is_mult=False, **kwargs):
    # Weekday seasonality model, 6 params
    # params_long[0] is default series value,
    params_long = np.concatenate([[float(is_mult)], params])
    return params_long[a_date.weekday]


def _f_validate_input_season_wday(a_x, a_y, a_date):
    assert a_date is not None
    assert a_date.weekday.drop_duplicates().size == 7


model_season_wday = ForecastModel(
    'season_wday',
    6,
    _f_model_season_wday,
    l_f_validate_input=_f_validate_input_season_wday)


# - Month seasonality
def _f_init_params_season_month(
        a_x=None,
        a_y=None,
        a_date=None,
        is_mult=False):
    if a_y is None or a_date is None:
        return np.random.uniform(low=-1, high=1, size=11)
    else:  # TODO: Improve this
        l_params_long = [np.mean(a_y[a_date.month == i])
                         for i in np.arange(1, 13)]
        l_baseline = l_params_long[-1]
        l_params = l_params_long[:-1]
        if not is_mult:
            l_params_add = l_params - l_baseline
            return l_params_add
        else:
            l_params_mult = l_params / l_baseline
            return l_params_mult


def _f_model_season_month(a_x, a_date, params, is_mult=False, **kwargs):
    # Month of December is taken as default level, has no parameter
    # params_long[0] is default series value
    params_long = np.concatenate([[float(is_mult)], params])
    return params_long[a_date.month - 1]


model_season_month = ForecastModel(
    'season_month',
    11,
    _f_model_season_month,
    _f_init_params_season_month)

model_season_month_old = ForecastModel(
    'season_month_old', 11, _f_model_season_month)


def _f_model_yearly_season_fourier(
        a_x,
        a_date,
        params,
        is_mult=False,
        **kwargs):
    # Infer the time series frequency to calculate the Fourier parameters

    period = dict_fourier['period']
    harmonics = dict_fourier['harmonics']

    return _f_model_season_fourier(a_date, params, period, harmonics, is_mult)


date_origin = pd.datetime(1970, 1, 1)


def _f_model_season_fourier(a_date, params, period, harmonics, is_mult=False):
    # convert to days since epoch
    t = (a_date - date_origin).days.values
    i = np.arange(1, harmonics + 1)
    a_tmp = i.reshape(i.size, 1) * t
    k = (2.0 * np.pi / period)
    y = np.concatenate([np.sin(k * a_tmp), np.cos(k * a_tmp)])

    # now multiply by the params
    y = np.matmul(params, y)
    return y


def _f_init_params_fourier_n_params(
        n_params,
        a_x=None,
        a_y=None,
        a_date=None,
        is_mult=False):
    if a_y is None:
        params = np.random.uniform(0.001, 1, n_params)
    else:
        # max difference in time series
        diff = a_y.max() - a_y.min()
        params = diff * np.random.uniform(0.001, 1, n_params)
    return params


def _f_init_params_fourier(a_x=None, a_y=None, a_date=None, is_mult=False):
    n_params = 2 * dict_fourier['harmonics']
    return _f_init_params_fourier_n_params(
        n_params, a_x=a_x, a_y=a_y, a_date=a_date, is_mult=is_mult)


def _f_init_bounds_fourier_nparams(n_params, a_x=None, a_y=None, a_date=None):
    return n_params * [-np.inf], n_params * [np.inf]


def _f_init_bounds_fourier_yearly(a_x=None, a_y=None, a_date=None):
    n_params = 2 * dict_fourier['harmonics']
    return _f_init_bounds_fourier_nparams(n_params, a_x, a_y, a_date)


model_season_fourier_yearly = ForecastModel(
    name='season_fourier_yearly',
    n_params=2 * dict_fourier['harmonics'],
    f_model=_f_model_yearly_season_fourier,
    f_init_params=_f_init_params_fourier,
    f_bounds=_f_init_bounds_fourier_yearly)


def get_fixed_model(forecast_model, params_fixed, is_mult=False):
    # Generate model with some fixed parameters
    if len(params_fixed) != forecast_model.n_params:
        err = 'Wrong number of fixed parameters'
        raise ValueError(err)
    return ForecastModel(
        forecast_model.name + '_fixed', 0,
        f_model=lambda a_x, a_date, params, is_mult=is_mult, **kwargs:
        forecast_model.f_model(
            a_x=a_x, a_date=a_date, params=params_fixed, is_mult=is_mult))


def get_iqr_thresholds(s_diff, low=0.25, high=0.75):
    # Get thresholds based on inter quantile range
    q1 = s_diff.quantile(low)
    q3 = s_diff.quantile(high)
    iqr = q3 - q1
    thr_low = q1 - 1.5 * iqr
    thr_hi = q3 + 1.5 * iqr
    return thr_low, thr_hi


# TODO: REMOVE THIS FUNCTION
def get_model_outliers_withgap(df, window=3):
    # TODO: ADD CHECK, TO PREVENT REDUNDANT OPS IN DF WITHOUT GAPS

    df_nogap = df.pipe(model_utils.interpolate_df, include_mask=True)
    mask_step, mask_spike = get_model_outliers(df_nogap)

    # TODO: FOR EACH OF MASK STEP, MASK SPIKE, IF IT IS NONE, RETURN NONE
    if mask_spike is None and mask_step is None:
        return None, None
    if mask_spike is not None:
        df_nogap['mask_spike'] = mask_spike
    if mask_step is not None:
        df_nogap['mask_step'] = mask_step
        df_nogap['step_in_filled_gap'] = \
            df_nogap.mask_step * df_nogap.is_gap_filled
        df_nogap['mask_step_patch'] = df_nogap.step_in_filled_gap.shift(
            -1).fillna(0)

    df_nogap = df_nogap.loc[~df_nogap.is_gap_filled]

    if mask_step is not None:
        df_nogap['mask_step_patch'] = df_nogap.mask_step_patch.shift(
            1).fillna(0)
        df_nogap['mask_step'] = df_nogap.mask_step + df_nogap.mask_step_patch

    logger_info('df 1 - no gap:', df_nogap)

    if mask_step is not None:
        mask_step = df_nogap.mask_step.values
    if mask_spike is not None:
        mask_spike = df_nogap.mask_spike.values
    return mask_step, mask_spike

    # todo - clean up, return


# TODO: Add option - estimate_outl_size
# TODO: Add option - sigmoid steps
# TODO: ADD option - gaussian spikes
def get_model_outliers(df, window=3):
    """
    Identify outlier samples in a time series

    :param df: Input time series
    :type df: pandas.DataFrame
    :param window: The x-axis window to aggregate multiple steps/spikes
    :type window: int
    :return:
        | tuple (mask_step, mask_spike)
        | mask_step: True if sample contains a step
        | mask_spike: True if sample contains a spike
    :rtype: tuple of 2 numpy arrays of booleans

    TODO: require minimum number of samples to find an outlier
    """

    dfo = df.copy()  # dfo - df for outliers
    # If df has datetime index, use date logic in steps/spikes
    with_dates = 'date' in df.columns
    x_col = 'date' if with_dates else 'x'

    if df[x_col].duplicated().any():
        raise ValueError('Input cannot have multiple values per sample')

    # Get the differences
    dfo['dif'] = dfo.y.diff()

    # We consider as outliers the values that are
    # 1.5 * IQR (interquartile range) beyond the quartiles.
    # These thresholds are obtained here
    thr_low, thr_hi = get_iqr_thresholds(dfo.dif)
    # Now identify the changes
    dfo['ischange'] = ((dfo.dif < thr_low) | (dfo.dif > thr_hi)).astype(int)

    # Whenever there are two or more consecutive changes
    # (that is, within `window` samples), we group them together
    dfo['ischange_group'] = (
        dfo.ischange.rolling(window, win_type=None, center=True).max().fillna(
            0).astype(int)
    )

    # We now have to calculate the difference within the
    # same group in order to identify if the consecutive changes
    # result in a step, a spike, or both.

    # We get the filtered difference
    dfo['dif_filt'] = (dfo.dif * dfo.ischange).fillna(0)
    # And the absolute value of that
    dfo['dif_filt_abs'] = dfo.dif_filt.abs()

    dfo['change_group'] = dfo.ischange_group.diff(
    ).abs().fillna(0).astype(int).cumsum()

    # this gets us the average difference of the outliers within each change
    # group
    df_mean_gdiff = (
        dfo.loc[dfo.ischange.astype(bool)].groupby('change_group')[
            'dif_filt'].mean().rename('mean_group_diff').reset_index())
    # this gets us the average absolute difference of the outliers within each
    # change group
    df_mean_gdiff_abs = (
        dfo.loc[dfo.ischange.astype(bool)].groupby('change_group')[
            'dif_filt_abs'].mean().rename(
            'mean_group_diff_abs').reset_index()
    )

    # Merge the differences with the original dfo
    dfo = dfo.merge(
        df_mean_gdiff,
        how='left').merge(
        df_mean_gdiff_abs,
        how='left')
    # Fill missing values with zero -> no change
    dfo.mean_group_diff = dfo.mean_group_diff.fillna(0)
    dfo.mean_group_diff_abs = dfo.mean_group_diff_abs.fillna(0)

    # the change group is a step if the mean_group_diff exceeds the thresholds
    dfo['is_step'] = dfo['ischange_group'] & (
        ((dfo.mean_group_diff < thr_low) | (dfo.mean_group_diff > thr_hi)))

    # the change group is a spike if the difference between the
    # mean_group_diff_abs and the average mean_group_diff exceeds
    # the average threshold value
    dfo['is_spike'] = (dfo.mean_group_diff_abs -
                       dfo.mean_group_diff.abs()) > (thr_hi - thr_low) / 2

    # Get the outlier start and end points for each group
    df_outl = (
        dfo.loc[dfo.ischange.astype(bool)].groupby('change_group').apply(
            lambda x: pd.Series(
                {'outl_start': x[x_col].iloc[0],
                 'outl_end': x[x_col].iloc[-1]})).reset_index()
    )

    if df_outl.empty:  # No outliers - nothing to do
        return np.full(dfo.index.size, False), np.full(dfo.index.size, False)

    dfo = dfo.merge(df_outl, how='left')
    # Get the start and end points in dfo
    if with_dates:
        # Convert to datetime, if we are using dates
        dfo['outl_start'] = pd.to_datetime(dfo.outl_start)
        dfo['outl_end'] = pd.to_datetime(dfo.outl_end)
        # Create the mask for spikes and steps
        dfo['mask_spike'] = (dfo['is_spike'] &
                             (dfo.date >= pd.to_datetime(dfo.outl_start)) &
                             (dfo.date < pd.to_datetime(dfo.outl_end)))
        dfo['mask_step'] = (dfo['is_step'] &
                            (dfo.date >= pd.to_datetime(dfo.outl_start)) &
                            (dfo.date <= pd.to_datetime(dfo.outl_end)))
    else:
        # For non-date x values, we fill na's and convert to int
        dfo['outl_start'] = dfo.outl_start.fillna(0).astype(int)
        dfo['outl_end'] = dfo.outl_end.fillna(0).astype(int)
        # Create the mask for spikes and steps
        dfo['mask_spike'] = (dfo['is_spike'] &
                             (dfo.x >= dfo.outl_start) &
                             (dfo.x < dfo.outl_end))
        dfo['mask_step'] = (dfo['is_step'] &
                            (dfo.x >= dfo.outl_start) &
                            (dfo.x <= dfo.outl_end))

    return dfo.mask_step.values, dfo.mask_spike.values


def create_fixed_step(diff, x):
    # Generate a fixed step model
    fixed_params = [x, diff]
    return get_fixed_model(model_step, fixed_params)


def create_fixed_spike(diff, x, duration):
    # Generate a fixed spike model
    fixed_params = [diff, x, x + duration]
    return get_fixed_model(model_spike, fixed_params)


def create_fixed_spike_ignored(x, duration):
    # Generate a fixed spike ignored model
    fixed_params = [0, x, x + duration]
    return get_fixed_model(model_spike, fixed_params, is_mult=True)


# Dummy variable models

def get_model_dummy(name, dummy, **kwargs):
    """
    Generate a model based on a dummy variable.

    :param name: Name of the model
    :type name: basestring
    :param dummy:
      | Can be a function or a list-like.
      | If a function, it must be of the form f_dummy(a_x, a_date),
      | and return a numpy array of floats
      | with the same length as a_x and values that are either 0 or 1.
      | If a list-like of numerics, it will be converted to a f_dummy function
      | as described above, which will have values of 1 when a_x has one of
      | the values in the list, and 0 otherwise. If a list-like of date-likes,
      | it will be converted to a f_dummy function as described above, which
      | will have values of 1 when a_date has one of the values in the list,
      | and 0 otherwise.
    :type dummy: function, or list-like of numerics or datetime-likes
    :param kwargs:
    :type kwargs:
    :return:
      | A model that returns A when dummy is 1, and 0 (or 1 if is_mult==True)
      | otherwise.
    :rtype: ForecastModel


    """
    return ForecastModel(name, 1, get_f_model_dummy(dummy), **kwargs)


def _validate_f_dummy(f_dummy):
    # Ensures that behaviour of f_dummy matches specs
    # Must return array of floats, same length as a_x, with values either 0.
    # or 1.
    def validate_for_dummy(a_dummy):
        assert isinstance(a_dummy, np.ndarray)
        assert np.setdiff1d(a_dummy, np.array([0., 1.])).size == 0

    # validate_for_dummy(f_dummy(np.arange(0, 10), None)) # Crashes with
    # f_dummy 's that require dates
    validate_for_dummy(
        f_dummy(
            np.arange(
                0, 10), pd.date_range(
                '2018-01-01', '2018-01-10')))


def get_f_model_dummy(dummy):
    """
    Generate a model function for a dummy variable defined by f_dummy

    :param dummy: dummy variable
    :type dummy: function or list-like of numerics or dates
    :return: model function based on dummy variable, to use on a ForecastModel
    :rtype: function
    """

    if callable(dummy):  # If dummy is a function, use it
        f_dummy = dummy
    elif isinstance(dummy, Holiday):
        f_dummy = get_f_dummy_from_holiday(dummy)
    elif isinstance(dummy, AbstractHolidayCalendar):
        f_dummy = get_f_dummy_from_calendar(dummy)
    else:
        # If dummy is a list, convert to function
        f_dummy = get_f_dummy_from_list(dummy)

    _validate_f_dummy(f_dummy)

    def f_model_check(a_x, a_date, params, is_mult=False, **kwargs):
        # Uses internal f_check to assign 0 or 1 to each sample
        # If f_dummy(x)==1, return A
        # If f_dummy(x)==0, return 0 (or 1 if is_mult)
        [A] = params
        mask = f_dummy(a_x, a_date)
        if not is_mult:
            a_result = A * mask
        else:
            a_result = (A) * mask + 1
        return a_result

    return f_model_check


def get_f_dummy_from_list(list_check):
    """
    Generate a f_dummy function that defines a dummy variable, can be used
    for dummy models

    :param list_check: Input list
    :type list_check: list-like of numerics or datetime-likes
    :return: f_dummy
    :rtype: function
    """
    # Generate a f_dummy function that defines a dummy variable, can be used
    # for dummy models
    s_check = pd.Series(list_check)
    assert s_check.size, 'Input list cannot be empty'
    if pd.api.types.is_numeric_dtype(s_check):
        list_check_numeric = s_check

        def f_dummy_list_numeric(a_x, a_date):
            # return a_x in check_numeric
            return np.isin(a_x, list_check_numeric).astype(float)

        return f_dummy_list_numeric
    else:
        try:
            list_check_date = pd.to_datetime(s_check)

            def f_dummy_list_date(a_x, a_date):
                # return a_x in check_numeric
                return np.isin(a_date, list_check_date).astype(float)

            return f_dummy_list_date
        except BaseException:
            raise ValueError(
                'list_dummy must be a list-like with numeric or'
                'date-like values: %s', list_check)


def get_f_dummy_from_calendar(calendar):
    # Generate dummy model function from a pandas HolidayCalendar

    def f_dummy_calendar(a_x, a_date, **kwargs):
        # TODO: If we can pass dict_cal as an argument,
        #       use pre-loaded list of dates for performance

        # TODO: If we can guarantee sorted dates,
        #       change this to a_date[0], a_date[-1] for performance
        list_check_date = calendar.holidays(a_date.min(), a_date.max())
        return np.isin(a_date, list_check_date).astype(float)

    return f_dummy_calendar


def get_f_dummy_from_holiday(holiday):
    def f_dummy_holiday(a_x, a_date, **kwargs):
        # TODO: If we can pass dict_cal as an argument,
        #       use pre-loaded list of dates for performance
        # if dict_cal in kwargs.keys():
        #    list_check_date = dict_cal.get(holiday.name)
        # else:

        # TODO: If we can guarantee sorted dates,
        #       change this to a_date[0], a_date[-1] for performance
        list_check_date = holiday.dates(a_date.min(), a_date.max())
        return np.isin(a_date, list_check_date).astype(float)

    return f_dummy_holiday


model_season_wday_2 = get_model_dummy(
    'season_wday_2', lambda a_x, a_date, **kwargs:
    (a_date.weekday < 5).astype(float))

# Example dummy model - checks if it is Christmas
model_dummy_christmas = get_model_dummy(
    'dummy_christmas', lambda a_x, a_date, **kwargs:
    ((a_date.month == 12) & (a_date.day == 25)).astype(float))

# Example dummy model - checks if it is first day of month
model_dummy_month_start = get_model_dummy(
    'dummy_month_start', lambda a_x, a_date, **kwargs:
    (a_date.day == 1).astype(float))


class CalendarBankHolUK(AbstractHolidayCalendar):
    rules = [
        GoodFriday,
        EasterMonday,
        # Early May Bank Holiday - first Monday in May
        Holiday('Early May Bank Holiday', month=5, day=1,
                offset=DateOffset(weekday=MO(1))
                ),
        # Spring Bank Holiday - Last Monday in May
        Holiday('Spring Bank Holiday', month=5, day=31,
                offset=DateOffset(weekday=MO(-1))
                ),
        # August Bank holiday - Last Monday in August
        Holiday('August Bank Holiday', month=8, day=30,
                offset=DateOffset(weekday=MO(-1))
                )
    ]


class CalendarChristmasUK(AbstractHolidayCalendar):
    rules = [
        Holiday('New Year\'s Day', month=1, day=1, observance=next_monday),
        Holiday('Christmas', month=12, day=25, observance=next_monday),
        Holiday('Boxing Day', month=12, day=26,
                observance=next_monday_or_tuesday),
    ]


# Bank Holidays for Italy
class CalendarBankHolIta(AbstractHolidayCalendar):
    rules = [
        EasterMonday,
        Holiday('Festa della Liberazione', month=4, day=25),
        Holiday('Festa del lavoro', month=5, day=1),
        Holiday('Festa della Repubblica', month=6, day=2),
        Holiday('Ferragosto', month=8, day=15),
        Holiday('Tutti i Santi', month=11, day=1),
        Holiday('Immacolata Concezione', month=12, day=8),
    ]


class CalendarChristmasIta(AbstractHolidayCalendar):
    rules = [
        Holiday('New Year\'s Day', month=1, day=1, observance=next_monday),
        Holiday('Christmas', month=12, day=25, observance=next_monday),
        Holiday('Santo Stefano', month=12, day=26,
                observance=next_monday_or_tuesday),
        Holiday('Epiphany', month=1, day=6, observance=next_monday),
    ]


def get_model_from_calendars(l_calendar, name=None):
    """
    Create a ForecastModel based on a list of pandas Calendars.

    :param calendar:
    :type calendar: pandas.tseries.AbstractHolidayCalendar
    :return: model based on the input calendar
    :rtype: ForecastModel

    In pandas, Holidays and calendars provide a simple way to define
    holiday rules, to be used in any analysis that requires a predefined
    set of holidays. This function converts a Calendar object into a
    ForecastModel that assigns a parameter to each calendar rule.

    As an example, a Calendar with 1 rule defining Christmas dates
    generates a model with a single parameter, which
    determines the amount added/multiplied to samples falling on Christmas.
    A calendar with 2 rules for Christmas and New Year will have two parameters
    - the first one applying to samples in Christmas, and the second
    one applying to samples in New Year.

    Usage::

        from pandas.tseries.holiday import USFederalHolidayCalendar
        model_calendar = get_model_from_calendar(USFederalHolidayCalendar())

    """

    if isinstance(l_calendar, AbstractHolidayCalendar):
        l_calendar = [l_calendar]

    # Filter out calendars without rules
    l_calendar = [calendar for calendar in l_calendar if calendar.rules]

    assert len(l_calendar), 'Need 1+ valid calendars'

    if name is None:
        name = l_calendar[0].name

    l_model_dummy = [get_model_dummy(calendar.name, calendar)
                     for calendar in l_calendar]
    f_model_prod = np.prod(l_model_dummy)
    f_model_sum = np.sum(l_model_dummy)

    def _f_init_params_calendar(
            a_x=None, a_y=None, a_date=None, is_mult=False):
        if is_mult:
            return np.ones(len(l_model_dummy))
        else:
            return np.zeros(len(l_model_dummy))

    def _f_model_calendar(a_x, a_date, params, is_mult=False, **kwargs):
        f_all_dummies = f_model_prod if is_mult else f_model_sum
        return f_all_dummies(a_x, a_date, params, is_mult, **kwargs)

    model_calendar = ForecastModel(
        name,
        len(l_model_dummy),
        _f_model_calendar,
        _f_init_params_calendar
    )
    return model_calendar


model_calendar_uk = get_model_from_calendars(
    [CalendarChristmasUK(), CalendarBankHolUK()], 'calendar_uk')
model_calendar_us = get_model_from_calendars(USFederalHolidayCalendar(),
                                             'calendar_us')
# Calendar for Italy
model_calendar_ita = get_model_from_calendars(
    [CalendarChristmasUK(), CalendarBankHolUK()], 'calendar_uk')


def get_model_from_datelist(name=None, *args):
    """
    Create a ForecastModel based on one or more lists of dates.

    :param name: Model name
    :type name: str
    :param args: Each element in args is a list of dates.
    :type args:
    :return: model based on the input lists of dates
    :rtype: ForecastModel

    Usage::

        model_datelist1=get_model_from_date_list('datelist1',
                                                 [date1, date2, date3])
        model_datelists23 = get_model_from_date_list('datelists23',
                                                [date1, date2], [date3, date4])

    In the example above, model_datelist1 will have one parameter, which
    determines the amount added/multiplied to samples with dates matching
    either date1, date2 or date3. model_datelists23 will have two parameters
    - the first one applying to samples in date1 and date2, and the second
    one applying to samples in date 3 and date4

    """
    l_model_dummy = [get_model_dummy('model_dummy', pd.to_datetime(l_date))
                     for l_date in args]
    assert (len(l_model_dummy)), 'Need 1+ lists of dates'
    f_model_prod = np.prod(l_model_dummy)
    f_model_sum = np.sum(l_model_dummy)

    def _f_init_params_date_list(
            a_x=None, a_y=None, a_date=None, is_mult=False):
        if is_mult:
            return np.ones(len(l_model_dummy))
        else:
            return np.zeros(len(l_model_dummy))

    def _f_model_date_list(a_x, a_date, params, is_mult=False, **kwargs):
        f_all_dummies = f_model_prod if is_mult else f_model_sum
        return f_all_dummies(a_x, a_date, params, is_mult, **kwargs)

    model_date_list = ForecastModel(
        name,
        len(l_model_dummy),
        _f_model_date_list,
        _f_init_params_date_list
    )
    return model_date_list


# Utility functions

def fix_params_fmodel(forecast_model, l_params_fixed):
    """
    Given a forecast model and a list of floats, modify the model so that some
    of its parameters become fixed

    :param forecast_model: Input model
    :type forecast_model: ForecastModel
    :param l_params_fixed: List of floats with same length as number of
        parameters in model. For each element, a non-null value means
        that the parameter in that position is fixed to that value.
        A null value means that the parameter in that position is not fixed.
    :type l_params_fixed: list
    :return: A forecast model with a number of parameters equal to the number
        of null values in l_params_fixed, with f_model modified so that some
        of its parameters gain fixed values equal to the non-null values
        in l_params
    :rtype: ForecastModel
    """
    assert len(l_params_fixed) == forecast_model.n_params

    l_params_fixed = np.array(l_params_fixed)

    a_null = np.isnan(l_params_fixed)
    i_null = np.nonzero(a_null)

    name = '{}_fixed_{}'.format(
        forecast_model.name,
        str(l_params_fixed).replace(
            'nan',
            ':'))
    n_params = len(i_null[0])

    def f_model_fixed(a_x, a_date, params, is_mult=False, **kwargs):
        params_long = l_params_fixed
        params_long[i_null] = params
        return forecast_model.f_model(a_x, a_date, params_long, is_mult)

    def f_init_params_fixed(a_x=None, a_y=None, a_date=None, is_mult=False):
        # return params short
        params_init = forecast_model.f_init_params(a_x, a_y, a_date, is_mult)
        params_init_short = np.array(params_init)[i_null]
        return params_init_short

    def f_bounds_fixed(a_x=None, a_y=None, a_date=None):
        # return f_bounds short
        bounds_min, bounds_max = forecast_model.f_bounds(a_x, a_y, a_date)
        bounds_min_short = np.array(bounds_min)[i_null]
        bounds_max_short = np.array(bounds_max)[i_null]
        return bounds_min_short, bounds_max_short

    model_result = ForecastModel(
        name,
        n_params,
        f_model_fixed,
        f_init_params_fixed,
        f_bounds_fixed)
    return model_result


def simplify_model(f_model, a_x=None, a_y=None, a_date=None):
    """
    Check a model's bounds, and update model to make parameters fixed if their
    min and max bounds are equal

    :param f_model: Input model
    :type f_model: ForecastModel
    :param a_x: X axis for model function.
    :type a_x: numpy array of floats
    :param a_y: Input time series values, to compare to the model function
    :type a_y: numpy array of floats
    :param a_date: Dates for the input time series
    :type a_date: numpy array of datetimes
    :return: Model with simplified parameters based on bounds
    :rtype: ForecastModel
    """
    bounds_min, bounds_max = f_model.f_bounds(a_x, a_y, a_date)
    bounds_diff = np.array(bounds_max) - np.array(bounds_min)
    i_diff_zero = np.nonzero(bounds_diff == 0)
    # For any parameter, if bounds_min == bounds_max, that parameter becomes
    # fixed

    if i_diff_zero[0].size == 0:
        return f_model
    else:  # We make parameters fixed if their min and max bounds are equal
        params_fixed = np.full(f_model.n_params, np.NaN)
        params_fixed[i_diff_zero, ] = bounds_max[i_diff_zero, ]
        f_model = fix_params_fmodel(f_model, params_fixed)
        logger.info(
            'Some min and max bounds are equal - generating fixed model: %s',
            f_model.name)
        return f_model


def validate_initial_guess(initial_guess, bounds):
    # Check that initial parameter values fall within model bounds
    initial_guess = np.array(initial_guess)
    bounds_min, bounds_max = bounds
    return np.all(
        (initial_guess >= bounds_min) & (
            initial_guess <= bounds_max))


def get_l_model_auto_season(a_date, min_periods=1.5, season_add_mult='add',
                            l_season_yearly=None, l_season_weekly=None):
    """
    Generates a list of candidate seasonality models for an series of
    timestamps

    :param a_date: date array of a time series
    :type a_date: numpy array of timestamps
    :param min_periods: Minimum number of periods required to apply
        seasonality
    :type min_periods: float
    :param season_add_mult: 'add' or 'mult'
    :type is_mult: basestring
    :return: list of candidate seasonality models
    :rtype: list of ForecastModel
    """
    s_date = pd.Series(a_date).sort_values().drop_duplicates()
    min_date_delta = s_date.diff().min()
    max_date_delta = s_date.max() - s_date.min()

    if pd.isna(min_date_delta) or pd.isna(max_date_delta):
        return [model_null]

    use_season_yearly = (
        # Need more than a full year
        (max_date_delta > pd.Timedelta(min_periods * 365, unit='d')) &
        # Need at least quarterly samples
        (min_date_delta <= pd.Timedelta(92, unit='d'))
    )

    use_season_weekly = (
        # Need more than a full week
        (max_date_delta > pd.Timedelta(min_periods * 7, unit='d')) &
        # Need at least daily samples
        (min_date_delta <= pd.Timedelta(1, unit='d'))
    )

    l_season_yearly_default = [
        # model_season_month,
        model_season_fourier_yearly,
        model_null] if l_season_yearly is None else l_season_yearly
    l_season_weekly_default = [
        model_season_wday,
        model_null] if l_season_weekly is None else l_season_weekly

    if use_season_weekly:
        l_season_weekly = l_season_weekly_default
    else:
        l_season_weekly = [model_null]

    if use_season_yearly:
        l_season_yearly = l_season_yearly_default
        # TODO: add season_yearly_fourier
        # TODO: add holiday list
    else:
        l_season_yearly = [model_null]

    l_result = [model_null]
    for s_w, s_y in itertools.product(l_season_weekly, l_season_yearly):

        model_season_add = s_w + s_y
        model_season_mult = s_w * s_y

        if season_add_mult in ['add'] and model_season_add != model_null:
            l_result += [model_season_add]
        if season_add_mult in ['mult'] and \
                model_season_mult != model_null and \
                model_season_mult not in l_result:
            l_result += [model_season_mult]
    # Sort values to make results more predictable, testable
    l_result.sort()
    return l_result