# Author: Jean-Baptiste Schiratti <jean.baptiste.schiratti@gmail.com>
#         Alexandre Gramfort <alexandre.gramfort@inria.fr>
# License: BSD 3 clause

"""Univariate feature functions."""

from math import sqrt, log, floor, gamma

import numpy as np
from scipy import stats
from scipy.ndimage import convolve1d
from sklearn.neighbors import KDTree
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, explained_variance_score


from .mock_numba import nb
from .utils import (power_spectrum, _embed, _filt, _get_feature_funcs,
                    _wavelet_coefs, _idxiter, _psd_params_checker)


def get_univariate_funcs(sfreq):
    """Mapping between aliases and univariate feature functions.

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    Returns
    -------
    univariate_funcs : dict
    """
    return _get_feature_funcs(sfreq, __name__)


def _unbiased_autocorr(x, lags=50):
    """Unbiased autocorrelation function.

    The autocorrelation function is computed using the FFT of the signal.

    Parameters
    ----------
    x : ndarray, shape (n_times,)

    lags : int (default: 50)
        Number of lags for the autocorrelation function.

    Returns
    -------
    ndarray, shape (n_lags,)
    """
    n_times = x.shape[0]
    xm = x - np.mean(x)
    dnorm = np.r_[np.arange(1, n_times + 1), np.arange(n_times - 1, 0, -1)]
    fft = np.fft.fft(xm, n=n_times)
    acf = np.fft.ifft(fft * np.conjugate(fft))[:n_times]
    acf /= dnorm[n_times - 1:]
    acf = acf.real
    return acf[:(lags + 1)] / acf[0]


@nb.jit([nb.float64(nb.float64[:], nb.float64[:]),
         nb.float32(nb.float32[:], nb.float32[:])], nopython=True)
def _slope_lstsq(x, y):
    """Slope of a 1D least-squares regression.

    Utility function which returns the slope of the linear regression
    between x and y.

    Parameters
    ----------
    x : ndarray, shape (n_times,)

    y : ndarray, shape (n_times,)

    Returns
    -------
    float
    """
    n_times = x.shape[0]
    sx2 = 0
    sx = 0
    sy = 0
    sxy = 0
    for j in range(n_times):
        sx2 += x[j] ** 2
        sx += x[j]
        sxy += x[j] * y[j]
        sy += y[j]
    den = n_times * sx2 - (sx ** 2)
    num = n_times * sxy - sx * sy
    return num / den


@nb.jit([nb.float64[:](nb.float64[:]), nb.float32[:](nb.float32[:])],
        nopython=True)
def _accumulate_std(x):
    r = np.zeros((x.shape[0],), dtype=x.dtype)
    for j in range(1, x.shape[0]):
        m = 0
        for k in range(j + 1):
            m += x[k]
        m /= (j + 1)
        s = 0
        for k in range(j + 1):
            s += (x[k] - m) ** 2
        s /= j
        r[j] = sqrt(s)
    return r


@nb.jit([nb.float64[:](nb.float64[:]), nb.float32[:](nb.float32[:])],
        nopython=True)
def _accumulate_max(x):
    r = np.zeros((x.shape[0],), dtype=x.dtype)
    for j in range(x.shape[0]):
        m = -np.inf
        for k in range(j + 1):
            if x[k] >= m:
                m = x[k]
        r[j] = m
    return r


@nb.jit([nb.float64[:](nb.float64[:]), nb.float32[:](nb.float32[:])],
        nopython=True)
def _accumulate_min(x):
    r = np.zeros((x.shape[0],), dtype=x.dtype)
    for j in range(x.shape[0]):
        m = np.inf
        for k in range(j + 1):
            if x[k] <= m:
                m = x[k]
        r[j] = m
    return r


def compute_mean(data):
    """Mean of the data (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **mean**
    """
    return np.mean(data, axis=-1)


def compute_variance(data):
    """Variance of the data (per channel).

    Parameters
    ----------
    data : shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **variance**
    """
    return np.var(data, axis=-1, ddof=1)


def compute_std(data):
    """Standard deviation of the data.

    Parameters
    ----------
    data : shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels)

    Notes
    -----
    Alias of the feature function: **std**
    """
    return np.std(data, axis=-1, ddof=1)


def compute_ptp_amp(data):
    """Peak-to-peak (PTP) amplitude of the data (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **ptp_amp**
    """
    return np.ptp(data, axis=-1)


def compute_skewness(data):
    """Skewness of the data (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **skewness**
    """
    ndim = data.ndim
    return stats.skew(data, axis=ndim - 1)


def compute_kurtosis(data):
    """Kurtosis of the data (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **kurtosis**
    """
    ndim = data.ndim
    return stats.kurtosis(data, axis=ndim - 1, fisher=False)


@nb.jit([nb.float64[:, :](nb.float64[:, :]),
         nb.float32[:, :](nb.float32[:, :])], nopython=True)
def _hurst_exp_compute_rs(x):
    """Utility function for :func:`compute_hurst_exp`.

    Parameters
    ----------
    x : ndarray, shape (n_seqs, n_times)

    Returns
    -------
    output : ndarray, shape (n_seqs, n_times - 1)
    """
    n_seqs, n_times = x.shape
    rs = np.zeros((n_seqs, n_times - 1), dtype=x.dtype)
    for j in range(n_seqs):
        m = 0
        for k in range(n_times):
            m += x[j, k]
        m /= n_times
        y = np.empty((n_times,))
        for k in range(n_times):
            y[k] = x[j, k] - m
        z = np.empty((n_times,))
        z[0] = y[0]
        for k in range(1, n_times):
            z[k] = z[k - 1] + y[k]
        r = _accumulate_max(z) - _accumulate_min(z)
        s = _accumulate_std(x[j, :])
        for k in range(1, n_times):
            if s[k] == 0:
                rs[j, k - 1] = np.nan
            else:
                rs[j, k - 1] = r[k] / s[k]
    return rs


def _hurst_exp_helper(x, n_splits=20):
    """Helper function for :func:`compute_hurst_exp`.

    Compute the Hurst exponent from a univariate time series. The Hurst
    exponent is defined as the slope of the least-squares regression line
    going through a cloud of `n_splits` points. Each point is obtained by
    considering sub-series of `x` of `n_splits` different lenghts.

    Parameters
    ----------
    x : ndarray, shape (n_times,)

    Returns
    -------
    output : ndarray, shape (n_splits,)
    """
    n_times = x.shape[0]
    _splits = np.floor(np.logspace(start=4, stop=np.log2(n_times / 2),
                                   num=n_splits, base=2.))
    splits = np.unique(_splits).astype(int)
    reg = np.zeros((splits.size,))
    for j, n in enumerate(splits):
        a = x.copy()
        d = int(floor(n_times / n))
        a = np.lib.stride_tricks.as_strided(a, shape=(d, n),
                                            strides=(n * a.strides[-1],
                                                     a.strides[-1]))
        _rs = _hurst_exp_compute_rs(a)
        _rs = _rs[~np.isnan(_rs)]
        reg[j] = np.log(np.mean(_rs))
        s = sum([sqrt((n - i) / i) for i in range(1, n)]) * ((n - 0.5) / n)
        if n <= 340:
            corr = (gamma((n - 1) / 2.) / (sqrt(np.pi) * gamma(n / 2.))) * s
        else:
            corr = ((n - 0.5) / n) * (1. / sqrt(np.pi * n / 2.)) * s
        reg[j] -= (np.log(corr) - np.log(n) / 2)
    return _slope_lstsq(np.log(splits), reg)


def compute_hurst_exp(data):
    """Hurst exponent of the data (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **hurst_exp**. See [1]_ and [2]_.

    References
    ----------
    .. [1] Rasheed, B. Q. K. et al. (2004). Hurst exponent and financial
           market predictability. In IASTED conference on Financial
           Engineering and Applications (FEA 2004) (pp. 203-209).

    .. [2] Devarajan, K. et al. (2014). EEG-Based Epilepsy Detection and
           Prediction. International Journal of Engineering and Technology,
           6(3), 212.
    """
    n_channels, n_times = data.shape
    hurst = np.empty((n_channels,))
    for j in range(n_channels):
        hurst[j] = _hurst_exp_helper(data[j, :])
    return hurst


def _app_samp_entropy_helper(data, emb, metric='chebyshev',
                             approximate=True):
    """Utility function for `compute_app_entropy`` and `compute_samp_entropy`.

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    emb : int (default: 2)
        Embedding dimension.

    metric : str (default: chebyshev)
        Name of the metric function used with KDTree. The list of available
        metric functions is given by: ``KDTree.valid_metrics``.

    approximate : bool (default: True)
        If True, the returned values will be used to compute the
        Approximate Entropy (AppEn). Otherwise, the values are used to compute
        the Sample Entropy (SampEn).

    Returns
    -------
    output : ndarray, shape (n_channels, 2)
    """
    _all_metrics = KDTree.valid_metrics
    if metric not in _all_metrics:
        raise ValueError('The given metric (%s) is not valid. The valid '
                         'metric names are: %s' % (metric, _all_metrics))
    n_channels, n_times = data.shape
    phi = np.empty((n_channels, 2))
    for j in range(n_channels):
        r = 0.2 * np.std(data[j, :], axis=-1, ddof=1)
        # compute phi(emb, r)
        _emb_data1 = _embed(data[j, None], emb, 1)[0, :, :]
        if approximate:
            emb_data1 = _emb_data1
        else:
            emb_data1 = _emb_data1[:-1, :]
        count1 = KDTree(emb_data1, metric=metric).query_radius(
            emb_data1, r, count_only=True).astype(np.float64)
        # compute phi(emb + 1, r)
        emb_data2 = _embed(data[j, None], emb + 1, 1)[0, :, :]
        count2 = KDTree(emb_data2, metric=metric).query_radius(
            emb_data2, r, count_only=True).astype(np.float64)
        if approximate:
            phi[j, 0] = np.mean(np.log(count1 / emb_data1.shape[0]))
            phi[j, 1] = np.mean(np.log(count2 / emb_data2.shape[0]))
        else:
            phi[j, 0] = np.mean((count1 - 1) / (emb_data1.shape[0] - 1))
            phi[j, 1] = np.mean((count2 - 1) / (emb_data2.shape[0] - 1))
    return phi


def compute_app_entropy(data, emb=2, metric='chebyshev'):
    """Approximate Entropy (AppEn, per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    emb : int (default: 2)
        Embedding dimension.

    metric : str (default: chebyshev)
        Name of the metric function used with
        :class:`~sklearn.neighbors.KDTree`. The list of available
        metric functions is given by: ``KDTree.valid_metrics``.

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **app_entropy**. See [1]_.

    References
    ----------
    .. [1] Richman, J. S. et al. (2000). Physiological time-series analysis
           using approximate entropy and sample entropy. American Journal of
           Physiology-Heart and Circulatory Physiology, 278(6), H2039-H2049.
    """
    phi = _app_samp_entropy_helper(data, emb=emb, metric=metric,
                                   approximate=True)
    return np.subtract(phi[:, 0], phi[:, 1])


def compute_samp_entropy(data, emb=2, metric='chebyshev'):
    """Sample Entropy (SampEn, per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    emb : int (default: 2)
        Embedding dimension.

    metric : str (default: chebyshev)
        Name of the metric function used with KDTree. The list of available
        metric functions is given by: `KDTree.valid_metrics`.

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **samp_entropy**. See [1]_.

    References
    ----------
    .. [1] Richman, J. S. et al. (2000). Physiological time-series analysis
           using approximate entropy and sample entropy. American Journal of
           Physiology-Heart and Circulatory Physiology, 278(6), H2039-H2049.
    """
    phi = _app_samp_entropy_helper(data, emb=emb, metric=metric,
                                   approximate=False)
    if np.allclose(phi[:, 0], 0) or np.allclose(phi[:, 1], 0):
        raise ValueError('Sample Entropy is not defined.')
    else:
        return -np.log(np.divide(phi[:, 1], phi[:, 0]))


def compute_decorr_time(sfreq, data):
    """Decorrelation time (per channel).

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **decorr_time**. See [1]_.

    References
    ----------
    .. [1] Teixeira, C. A. et al. (2011). EPILAB: A software package for
           studies on the prediction of epileptic seizures. Journal of
           Neuroscience Methods, 200(2), 257-271.
    """
    n_channels, n_times = data.shape
    decorrelation_times = np.empty((n_channels,))
    for j in range(n_channels):
        _acf = _unbiased_autocorr(data[j, :])
        zc = np.diff(np.sign(_acf)) != 0
        if np.any(zc):
            decorr_time = np.argmax(zc) + 1
            decorr_time /= sfreq
        else:
            decorr_time = -1
        decorrelation_times[j] = decorr_time
    return decorrelation_times


def _freq_bands_helper(sfreq, freq_bands):
    """Utility function to define frequency bands.

    This utility function is to be used with :func:`compute_pow_freq_bands` and
    :func:`compute_energy_freq_bands`. It essentially checks if the given
    parameter ``freq_bands`` is valid and raises an error if not.

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    freq_bands : ndarray, shape (n_freq_bands + 1,) or (n_freq_bands, 2)
        Array defining frequency bands.

    Returns
    -------
    valid_freq_bands : ndarray, shape (n_freq_bands, 2)
    """
    if not np.logical_and(freq_bands >= 0, freq_bands <= sfreq / 2).all():
        raise ValueError('The entries of the given `freq_bands` parameter '
                         '(%s) must be positive and less than the Nyquist '
                         'frequency.' % str(freq_bands))
    else:
        if freq_bands.ndim == 1:
            n_freq_bands = freq_bands.shape[0] - 1
            valid_freq_bands = np.empty((n_freq_bands, 2))
            for j in range(n_freq_bands):
                valid_freq_bands[j, :] = freq_bands[j:j + 2]
        elif freq_bands.ndim == 2 and freq_bands.shape[-1] == 2:
            valid_freq_bands = freq_bands
        else:
            raise ValueError('The given value (%s) for the `freq_bands` '
                             'parameter is not valid. Only 1D or 2D arrays '
                             'with shape (n_freq_bands, 2) are accepted.'
                             % str(freq_bands))
        return valid_freq_bands


def compute_pow_freq_bands(sfreq, data, freq_bands=np.array([0.5, 4., 8., 13.,
                                                             30., 100.]),
                           normalize=True, ratios=None, psd_method='welch',
                           psd_params=None):
    """Power Spectrum (computed by frequency bands).

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    data : ndarray, shape (n_channels, n_times)

    freq_bands : ndarray or dict (default: np.array([.5, 4, 8, 13, 30, 100]))
        The parameter ``freq_bands`` should be either a ndarray with shape
        ``(n_freq_bands + 1,)`` or ``(n_freq_bands, 2)`` or a dict. If ndarray
        with shape ``(n_freq_bands + 1,)``, the entries define **contiguous**
        frequency bands as follows: the i-th frequency band is defined as:
        [freq_bands[i], freq_bands[i + 1]] (0 <= i <= n_freq_bands - 1). If
        ndarray with shape ``(n_freq_bands, 2)``, the rows of ``freq_bands``
        define **non-contiguous** frequency bands. If dict, the keys should be
        strings (names of the frequency bands) and the values, the
        corresponding bands (as ndarray with shape (2,) or list of length 2).
        When ``freq_bands`` is of type dict, the keys are used to generate the
        feature names (only used when features are extracted with
        ``return_as_df=True``). The values of ``freq_bands`` should be between
        0 and sfreq / 2 (the Nyquist frequency) as the function uses the
        one-sided PSD.

    normalize : bool (default: True)
        If True, the average power in each frequency band is normalized by
        the total power.

    ratios : str or None (default: None)
        If not None, the possible values for the parameter ``ratios`` are:
        ``all`` or ``only``. If ``all``, the function will return the power
        (computed in the given frequency bands) as well as the ratios between
        power in different frequency bands. All the possible pairs of distinct
        frequency bands are considered (if n_freq_bands are given,
        n_freq_bands * (n_freq_bands - 1) are computed). If ``only``, the
        function returns only the ratios of power in bands. If None, no
        ratio is computed.

    psd_method : str (default: 'welch')
        Method used for the estimation of the Power Spectral Density (PSD).
        Valid methods are: ``'welch'``, ``'multitaper'`` or ``'fft'``.

    psd_params : dict or None (default: None)
        If not None, dict with optional parameters (`welch_n_fft`,
        `welch_n_per_seg`, `welch_n_overlap`) to be passed to
        :func:`mne_features.utils.power_spectrum`. If None, default parameters
        are used (see doc for :func:`mne_features.utils.power_spectrum`).

    Returns
    -------
    output : ndarray, shape (n_channels * (n_freqs - 1),)

    Notes
    -----
    Alias of the feature function: **pow_freq_bands**. See [1]_.

    References
    ----------
    .. [1] Teixeira, C. A. et al. (2011). EPILAB: A software package for
           studies on the prediction of epileptic seizures. Journal of
           Neuroscience Methods, 200(2), 257-271.
    """
    n_channels = data.shape[0]
    if isinstance(freq_bands, dict):
        _freq_bands = np.asarray([freq_bands[n] for n in freq_bands])
    else:
        _freq_bands = np.asarray(freq_bands)
    fb = _freq_bands_helper(sfreq, _freq_bands)
    n_freq_bands = fb.shape[0]
    _psd_params = _psd_params_checker(psd_params)
    psd, freqs = power_spectrum(sfreq, data, psd_method=psd_method,
                                **_psd_params)
    pow_freq_bands = np.empty((n_channels, n_freq_bands))
    for j in range(n_freq_bands):
        mask = np.logical_and(freqs >= fb[j, 0], freqs <= fb[j, 1])
        psd_band = psd[:, mask]
        pow_freq_bands[:, j] = np.sum(psd_band, axis=-1)
    if normalize:
        pow_freq_bands = np.divide(pow_freq_bands,
                                   np.sum(psd, axis=-1)[:, None])
    if ratios is None:
        return pow_freq_bands.ravel()
    elif ratios not in ['all', 'only']:
        raise ValueError('The given value (%s) for the parameter `ratios` '
                         'is not valid. Valid values are: `all` or `only`.'
                         % str(ratios))
    else:
        band_ratios = np.empty((n_channels, n_freq_bands * (n_freq_bands - 1)))
        for pos, i, j in _idxiter(n_freq_bands, triu=False):
            band_ratios[:, pos] = (pow_freq_bands[:, i] / pow_freq_bands[:, j])
        if ratios == 'all':
            return np.r_[pow_freq_bands.ravel(), band_ratios.ravel()]
        else:
            return band_ratios.ravel()


def _compute_pow_freq_bands_feat_names(data, freq_bands, normalize, ratios,
                                       psd_method, psd_params):
    """Utility function to create feature names compatible with the output
    of :func:`compute_pow_freq_bands`."""
    n_channels = data.shape[0]
    if isinstance(freq_bands, dict):
        n_freq_bands = len(freq_bands)
        _band_names = [str(n) for n in freq_bands]
    else:
        n_freq_bands = (freq_bands.shape[0] - 1 if freq_bands.ndim == 1
                        else freq_bands.shape[0])
        _band_names = ['band' + str(j) for j in range(n_freq_bands)]
    ratios_names = ['ch%s_%s/%s' % (ch, _band_names[i], _band_names[j])
                    for ch in range(n_channels) for _, i, j in
                    _idxiter(n_freq_bands, triu=False)]
    pow_names = ['ch%s_%s' % (ch, _band_names[i]) for ch in
                 range(n_channels) for i in range(n_freq_bands)]
    if ratios is None:
        return pow_names
    elif ratios == 'only':
        return ratios_names
    else:
        return pow_names + ratios_names


compute_pow_freq_bands.get_feature_names = _compute_pow_freq_bands_feat_names


def compute_hjorth_mobility_spect(sfreq, data, normalize=False,
                                  psd_method='welch', psd_params=None):
    """Hjorth mobility (per channel).

    Hjorth mobility parameter computed from the Power Spectrum of the data.

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    data : ndarray, shape (n_channels, n_times)

    normalize : bool (default: False)
        Normalize the result by the total power.

    psd_method : str (default: 'welch')
        Method used for the estimation of the Power Spectral Density (PSD).
        Valid methods are: ``'welch'``, ``'multitaper'`` or ``'fft'``.

    psd_params : dict or None (default: None)
        If not None, dict with optional parameters (`welch_n_fft`,
        `welch_n_per_seg`, `welch_n_overlap`) to be passed to
        :func:`mne_features.utils.power_spectrum`. If None, default parameters
        are used (see doc for :func:`mne_features.utils.power_spectrum`).

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **hjorth_mobility_spect**. See [1]_ and
    [2]_.

    References
    ----------
    .. [1] Mormann, F. et al. (2006). Seizure prediction: the long and
           winding road. Brain, 130(2), 314-333.

    .. [2] Teixeira, C. A. et al. (2011). EPILAB: A software package for
           studies on the prediction of epileptic seizures. Journal of
           Neuroscience Methods, 200(2), 257-271.
    """
    _psd_params = _psd_params_checker(psd_params)
    psd, freqs = power_spectrum(sfreq, data, psd_method=psd_method,
                                **_psd_params)
    w_freqs = np.power(freqs, 2)
    mobility = np.sum(np.multiply(psd, w_freqs), axis=-1)
    if normalize:
        mobility = np.divide(mobility, np.sum(psd, axis=-1))
    return mobility


def compute_hjorth_complexity_spect(sfreq, data, normalize=False,
                                    psd_method='welch', psd_params=None):
    """Hjorth complexity (per channel).

    Hjorth complexity parameter computed from the Power Spectrum of the data.

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    data : ndarray, shape (n_channels, n_times)

    normalize : bool (default: False)
        Normalize the result by the total power.

    psd_method : str (default: 'welch')
        Method used for the estimation of the Power Spectral Density (PSD).
        Valid methods are: ``'welch'``, ``'multitaper'`` or ``'fft'``.

    psd_params : dict or None (default: None)
        If not None, dict with optional parameters (`welch_n_fft`,
        `welch_n_per_seg`, `welch_n_overlap`) to be passed to
        :func:`mne_features.utils.power_spectrum`. If None, default parameters
        are used (see doc for :func:`mne_features.utils.power_spectrum`).

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **hjorth_complexity_spect**. See [1]_ and
    [2]_.

    References
    ----------
    .. [1] Mormann, F. et al. (2006). Seizure prediction: the long and
           winding road. Brain, 130(2), 314-333.

    .. [2] Teixeira, C. A. et al. (2011). EPILAB: A software package for
           studies on the prediction of epileptic seizures. Journal of
           Neuroscience Methods, 200(2), 257-271.
    """
    _psd_params = _psd_params_checker(psd_params)
    psd, freqs = power_spectrum(sfreq, data, psd_method=psd_method,
                                **_psd_params)
    w_freqs = np.power(freqs, 4)
    complexity = np.sum(np.multiply(psd, w_freqs), axis=-1)
    if normalize:
        complexity = np.divide(complexity, np.sum(psd, axis=-1))
    return complexity


def compute_hjorth_mobility(data):
    """Hjorth mobility (per channel).

    Hjorth mobility parameter computed in the time domain.

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **hjorth_mobility**. See [1]_.

    References
    ----------
    .. [1] Paivinen, N. et al. (2005). Epileptic seizure detection: A
           nonlinear viewpoint. Computer methods and programs in biomedicine,
           79(2), 151-159.
    """
    x = np.insert(data, 0, 0, axis=-1)
    dx = np.diff(x, axis=-1)
    sx = np.std(x, ddof=1, axis=-1)
    sdx = np.std(dx, ddof=1, axis=-1)
    mobility = np.divide(sdx, sx)
    return mobility


def compute_hjorth_complexity(data):
    """Hjorth complexity (per channel).

    Hjorth complexity parameter computed in the time domain.

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **hjorth_complexity**. See [1]_.

    References
    ----------
    .. [1] Paivinen, N. et al. (2005). Epileptic seizure detection: A
           nonlinear viewpoint. Computer methods and programs in biomedicine,
           79(2), 151-159.
    """
    x = np.insert(data, 0, 0, axis=-1)
    dx = np.diff(x, axis=-1)
    m_dx = compute_hjorth_mobility(dx)
    m_x = compute_hjorth_mobility(data)
    complexity = np.divide(m_dx, m_x)
    return complexity


@nb.jit([nb.float64[:](nb.float64[:, :], nb.int64),
         nb.float32[:](nb.float32[:, :], nb.int32)], nopython=True)
def _higuchi_fd(data, kmax):
    """Utility function for :func:`compute_higuchi_fd`.

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    kmax : int

    Returns
    -------
    output : ndarray, shape (n_channels,)
    """
    n_channels, n_times = data.shape
    higuchi = np.empty((n_channels,), dtype=data.dtype)
    for s in range(n_channels):
        kmax = np.int64(kmax)
        lk = np.empty((kmax,))
        x_reg = np.empty((kmax,))
        y_reg = np.empty((kmax,))
        for k in range(1, kmax + 1):
            lm = np.empty((k,))
            for m in range(k):
                ll = 0
                n_max = floor((n_times - m - 1) / k)
                n_max = int(n_max)
                for j in range(1, n_max):
                    ll += abs(data[s, m + j * k] - data[s, m + (j - 1) * k])
                ll /= k
                ll *= (n_times - 1) / (k * n_max)
                lm[m] = ll
            # Mean of lm
            m_lm = 0
            for m in range(k):
                m_lm += lm[m]
            m_lm /= k
            lk[k - 1] = m_lm
            x_reg[k - 1] = log(1. / k)
            y_reg[k - 1] = log(m_lm)
        higuchi[s] = _slope_lstsq(x_reg, y_reg)
    return higuchi


def compute_higuchi_fd(data, kmax=10):
    """Higuchi Fractal Dimension (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    kmax : int (default: 10)
        Maximum delay/offset (in number of samples).

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **higuchi_fd**. See [1]_ and [2]_.

    References
    ----------
    .. [1] Esteller, R. et al. (2001). A comparison of waveform fractal
           dimension algorithms. IEEE Transactions on Circuits and Systems I:
           Fundamental Theory and Applications, 48(2), 177-183.

    .. [2] Paivinen, N. et al. (2005). Epileptic seizure detection: A
           nonlinear viewpoint. Computer methods and programs in biomedicine,
           79(2), 151-159.
    """
    return _higuchi_fd(data, kmax)


def compute_katz_fd(data):
    """Katz Fractal Dimension (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **katz_fd**. See [1]_.

    References
    ----------
    .. [1] Esteller, R. et al. (2001). A comparison of waveform fractal
           dimension algorithms. IEEE Transactions on Circuits and Systems I:
           Fundamental Theory and Applications, 48(2), 177-183.
    """
    dists = np.abs(np.diff(data, axis=-1))
    ll = np.sum(dists, axis=-1)
    a = np.mean(dists, axis=-1)
    ln = np.log10(np.divide(ll, a))
    aux_d = data - data[:, 0, None]
    d = np.max(np.abs(aux_d[:, 1:]), axis=-1)
    katz = np.divide(ln, np.add(ln, np.log10(np.divide(d, ll))))
    return katz


def compute_zero_crossings(data, threshold=np.finfo(np.float64).eps):
    """Number of zero-crossings (per channel).

    The ``threshold`` parameter is used to clip 'small' values to zero.
    Changing its default value is likely to affect the number of
    zero-crossings returned by the function.

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    threshold : float (default: np.finfo(np.float64).eps)
        Threshold used to determine when a float should de treated as zero.

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **zero_crossings**
    """
    _data = data.copy()
    # clip 'small' values to 0
    _data[np.abs(_data) < threshold] = 0
    sgn = np.sign(_data)
    # sgn may already contain 0 values (either 'true' zeros or clipped values)
    aux = np.diff((sgn == 0).astype(np.int64), axis=-1)
    count = np.sum(aux == 1, axis=-1) + (_data[:, 0] == 0)
    # zero between two consecutive time points (data[i] * data[i + 1] < 0)
    mask_implicit_zeros = sgn[:, 1:] * sgn[:, :-1] < 0
    count += np.sum(mask_implicit_zeros, axis=-1)
    return count


def compute_line_length(data):
    """Line length (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **line_length**. See [1]_.

    References
    ----------
    .. [1] Esteller, R. et al. (2001). Line length: an efficient feature for
           seizure onset detection. In Engineering in Medicine and Biology
           Society, 2001. Proceedings of the 23rd Annual International
           Conference of the IEEE (Vol. 2, pp. 1707-1710). IEEE.
    """
    return np.mean(np.abs(np.diff(data, axis=-1)), axis=-1)


def compute_spect_entropy(sfreq, data, psd_method='welch', psd_params=None):
    """Spectral Entropy (per channel).

    Spectral Entropy is defined to be the Shannon Entropy of the Power
    Spectrum of the data.

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data

    data : ndarray, shape (n_channels, n_times)

    psd_method : str (default: 'welch')
        Method used for the estimation of the Power Spectral Density (PSD).
        Valid methods are: ``'welch'``, ``'multitaper'`` or ``'fft'``.

    psd_params : dict or None (default: None)
        If not None, dict with optional parameters (`welch_n_fft`,
        `welch_n_per_seg`, `welch_n_overlap`) to be passed to
        :func:`mne_features.utils.power_spectrum`. If None, default parameters
        are used (see doc for :func:`mne_features.utils.power_spectrum`).

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **spect_entropy**. See [1]_.

    References
    ----------
    .. [1] Inouye, T. et al. (1991). Quantification of EEG irregularity by
           use of the entropy of the power spectrum. Electroencephalography
           and clinical neurophysiology, 79(3), 204-210.
    """
    _psd_params = _psd_params_checker(psd_params)
    psd, _ = power_spectrum(sfreq, data, psd_method=psd_method, **_psd_params)
    m = np.sum(psd, axis=-1)
    psd_norm = np.divide(psd[:, 1:], m[:, None])
    return -np.sum(np.multiply(psd_norm, np.log2(psd_norm)), axis=-1)


def compute_svd_entropy(data, tau=2, emb=10):
    """SVD entropy (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    tau : int (default: 2)
        Delay (number of samples).

    emb : int (default: 10)
        Embedding dimension.

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **svd_entropy**. See [1]_.

    References
    ----------
    .. [1] Roberts, S. J. et al. (1999). Temporal and spatial complexity
           measures for electroencephalogram based brain-computer interfacing.
           Medical & biological engineering & computing, 37(1), 93-98.
    """
    _, sv, _ = np.linalg.svd(_embed(data, d=emb, tau=tau))
    m = np.sum(sv, axis=-1)
    sv_norm = np.divide(sv, m[:, None])
    return -np.sum(np.multiply(sv_norm, np.log2(sv_norm)), axis=-1)


def compute_spect_slope(sfreq, data, fmin=0.1, fmax=50,
                        with_intercept=True, psd_method='welch',
                        psd_params=None):
    """Linear regression of the the log-log frequency-curve (per channel).

    Using a linear regression, the function estimates the slope and the
    intercept (if ``with_intercept`` is True) of the Power Spectral Density
    (PSD) in the log-log scale. In addition to this, the Mean Square Error
    (MSE) and R2 coefficient (goodness-of-fit) are returned. By default, the
    [0.1Hz, 50Hz] frequency range is used for the regression.

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    data : ndarray, shape (n_channels, n_times)

    fmin : float (default: 0.1)
        Lower bound of the frequency range considered in the linear regression.

    fmax : float (default: 50)
        Upper bound of the frequency range considered in the linear regression.

    with_intercept : bool (default: True)
        If True, the intercept of the linear regression is included among the
        features returned by the function. If False, only the slope, the MSE
        and the R2 coefficient are returned.

    psd_method : str (default: 'welch')
        Method used for the estimation of the Power Spectral Density (PSD).
        Valid methods are: ``'welch'``, ``'multitaper'`` or ``'fft'``.

    psd_params : dict or None (default: None)
        If not None, dict with optional parameters (`welch_n_fft`,
        `welch_n_per_seg`, `welch_n_overlap`) to be passed to
        :func:`mne_features.utils.power_spectrum`. If None, default parameters
        are used (see doc for :func:`mne_features.utils.power_spectrum`).

    Returns
    -------
    output : ndarray, shape (n_channels * 4,)
        The four characteristics: intercept, slope, MSE, and R2 per channel.

    Notes
    -----
    Alias of the feature function: **spect_slope**. See [1]_
    and [2]_.

    References
    ----------
    .. [1] Demanuelle C. et al. (2007). Distinguishing low frequency
           oscillations within the 1/f spectral behaviour of electromagnetic
           brain signals. Behavioral and Brain Functions (BBF).

    .. [2] Winkler I. et al. (2011). Automatic Classification of Artifactual
           ICA-Components for Artifact Removal in EEG Signals. Behavioral and
           Brain Functions (BBF).
    """
    n_channels = data.shape[0]
    _psd_params = _psd_params_checker(psd_params)
    psd, freqs = power_spectrum(sfreq, data, psd_method=psd_method,
                                **_psd_params)

    # mask limiting to input freq_range
    mask = np.logical_and(freqs >= fmin, freqs <= fmax)

    # freqs and psd selected over input freq_range and expressed in log scale
    freqs, psd = np.log10(freqs[mask]), np.log10(psd[:, mask])

    # linear fit
    lm = LinearRegression()
    fit_info = np.empty((n_channels, 4))
    for idx, power in enumerate(psd):
        lm.fit(freqs.reshape(-1, 1), power)
        fit_info[idx, 0] = lm.intercept_
        fit_info[idx, 1] = lm.coef_
        power_estimate = lm.predict(freqs.reshape(-1, 1))
        fit_info[idx, 2] = mean_squared_error(power, power_estimate)
        fit_info[idx, 3] = explained_variance_score(power, power_estimate)
    if not with_intercept:
        fit_info = fit_info[:, 1:]
    return fit_info.ravel()


def compute_svd_fisher_info(data, tau=2, emb=10):
    """SVD Fisher Information (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    tau : int (default: 2)
        Delay (number of samples).

    emb : int (default: 10)
        Embedding dimension.

    Returns
    -------
    output : ndarray, shape (n_channels,)

    Notes
    -----
    Alias of the feature function: **svd_fisher_info**. See [1]_.

    References
    ----------
    .. [1] Roberts, S. J. et al. (1999). Temporal and spatial complexity
           measures for electroencephalogram based brain-computer interfacing.
           Medical & biological engineering & computing, 37(1), 93-98.
    """
    _, sv, _ = np.linalg.svd(_embed(data, d=emb, tau=tau))
    m = np.sum(sv, axis=-1)
    sv_norm = np.divide(sv, m[:, None])
    aux = np.divide(np.diff(sv_norm, axis=-1) ** 2, sv_norm[:, :-1])
    return np.sum(aux, axis=-1)


def compute_energy_freq_bands(sfreq, data, freq_bands=np.array([0.5, 4., 8.,
                                                                13., 30.,
                                                                100.]),
                              deriv_filt=True):
    """Band energy (per channel).

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    data : ndarray, shape (n_channels, n_times)

    freq_bands : ndarray or dict (default: np.array([.5, 4, 8, 13, 30, 100]))
        The parameter ``freq_bands`` should be either a ndarray with shape
        ``(n_freq_bands + 1,)`` or ``(n_freq_bands, 2)`` or a dict. If ndarray
        with shape ``(n_freq_bands + 1,)``, the entries define **contiguous**
        frequency bands as follows: the i-th frequency band is defined as:
        [freq_bands[i], freq_bands[i + 1]] (0 <= i <= n_freq_bands - 1). If
        ndarray with shape ``(n_freq_bands, 2)``, the rows of ``freq_bands``
        define **non-contiguous** frequency bands. If dict, the keys should be
        strings (names of the frequency bands) and the values, the
        corresponding bands (as ndarray with shape (2,) or list of length 2).
        When ``freq_bands`` is of type dict, the keys are used to generate the
        feature names (only used when features are extracted with
        ``return_as_df=True``). The values of ``freq_bands`` should be between
        0 and sfreq / 2 (the Nyquist frequency) as the function uses the
        one-sided PSD.

    deriv_filt : bool (default: False)
        If True, a derivative filter is applied to the input data before
        filtering (see Notes).

    Returns
    -------
    output : ndarray, shape (n_channels * (n_freqs - 1),)

    Notes
    -----
    Alias of the feature function: **energy_freq_bands**. See [1]_.

    References
    ----------
    .. [1] Kharbouch, A. et al. (2011). An algorithm for seizure onset
           detection using intracranial EEG. Epilepsy & Behavior, 22, S29-S35.
    """
    n_channels = data.shape[0]
    if isinstance(freq_bands, dict):
        _freq_bands = np.asarray([freq_bands[n] for n in freq_bands])
    else:
        _freq_bands = np.asarray(freq_bands)
    fb = _freq_bands_helper(sfreq, _freq_bands)
    n_freq_bands = fb.shape[0]
    band_energy = np.empty((n_channels, n_freq_bands))
    if deriv_filt:
        _data = convolve1d(data, [1., 0., -1.], axis=-1, mode='nearest')
    else:
        _data = data
    for j in range(n_freq_bands):
        filtered_data = _filt(sfreq, _data, fb[j, :])
        band_energy[:, j] = np.sum(filtered_data ** 2, axis=-1)
    return band_energy.ravel()


def _compute_energy_fb_feat_names(data, freq_bands, deriv_filt):
    """Utility function to create feature names compatible with the output of
    :func:`mne_features.univariate.compute_energy_freq_bands`."""
    n_channels = data.shape[0]
    if isinstance(freq_bands, dict):
        n_freq_bands = len(freq_bands)
        _band_names = [str(n) for n in freq_bands]
    else:
        n_freq_bands = (freq_bands.shape[0] - 1 if freq_bands.ndim == 1
                        else freq_bands.shape[0])
        _band_names = ['band' + str(j) for j in range(n_freq_bands)]
    return ['ch%s_%s' % (ch, _band_names[i]) for ch in range(n_channels) for
            i in range(n_freq_bands)]


compute_energy_freq_bands.get_feature_names = _compute_energy_fb_feat_names


def compute_spect_edge_freq(sfreq, data, ref_freq=None, edge=None,
                            psd_method='welch', psd_params=None):
    """Spectal Edge Frequency (per channel).

    Parameters
    ----------
    sfreq : float
        Sampling rate of the data.

    data : ndarray, shape (n_channels, n_times)

    ref_freq : float or None (default: None)
        If not None, reference frequency for the computation of the spectral
        edge frequency. If None, `ref_freq = sfreq / 2` is used.

    edge : list of float or None (default: None)
        If not None, ``edge`` is expected to be a list of values between 0
        and 1. If None, ``edge = [0.5]`` is used.

    psd_method : str (default: 'welch')
        Method used for the estimation of the Power Spectral Density (PSD).
        Valid methods are: ``'welch'``, ``'multitaper'`` or ``'fft'``.

    psd_params : dict or None (default: None)
        If not None, dict with optional parameters (`welch_n_fft`,
        `welch_n_per_seg`, `welch_n_overlap`) to be passed to
        :func:`mne_features.utils.power_spectrum`. If None, default parameters
        are used (see doc for :func:`mne_features.utils.power_spectrum`).

    Returns
    -------
    output : ndarray, shape (n_channels * n_edge,)
        With: `n_edge = 1` if `edge` is None or `n_edge = len(edge)` otherwise.

    Notes
    -----
    Alias of the feature function: **spect_edge_freq**. See [1]_.

    References
    ----------
    .. [1] Mormann, F. et al. (2006). Seizure prediction: the long and winding
           road. Brain, 130(2), 314-333.
    """
    if ref_freq is None:
        _ref_freq = sfreq / 2
    else:
        _ref_freq = float(ref_freq)
    if edge is None:
        _edge = [0.5]
    else:
        # Check the values in `edge`
        if not all([0 <= p <= 1 for p in edge]):
            raise ValueError('The values in ``edge``` must be floats between '
                             '0 and 1. Got {} instead.'.format(edge))
        else:
            _edge = edge
    n_edge = len(_edge)
    n_channels, n_times = data.shape
    spect_edge_freq = np.empty((n_channels, n_edge))
    _psd_params = _psd_params_checker(psd_params)
    psd, freqs = power_spectrum(sfreq, data, psd_method=psd_method,
                                **_psd_params)
    out = np.cumsum(psd, 1)
    for i, p in enumerate(_edge):
        idx_ref = np.where(freqs >= _ref_freq)[0][0]
        ref_pow = np.sum(psd[:, :(idx_ref + 1)], axis=-1)
        for j in range(n_channels):
            idx = np.where(out[j, :] >= p * ref_pow[j])[0]
            if idx.size > 0:
                spect_edge_freq[j, i] = freqs[idx[0]]
            else:
                spect_edge_freq[j, i] = -1
    return spect_edge_freq.ravel()


def compute_wavelet_coef_energy(data, wavelet_name='db4'):
    """Energy of Wavelet decomposition coefficients (per channel).

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    wavelet_name : str (default: db4)
        Wavelet name (to be used with ``pywt.Wavelet``). The full list of
        Wavelet names are given by: ``[name for family in pywt.families() for
        name in pywt.wavelist(family)]``.

    Returns
    -------
    output : ndarray, shape (n_channels * levdec,)
        The decomposition level (``levdec``) used for the DWT is either 6 or
        the maximum useful decomposition level (given the number of time points
        in the data and chosen wavelet ; see ``pywt.dwt_max_level``).

    Notes
    -----
    Alias of the feature function: **wavelet_coef_energy**. See [1]_.

    References
    ----------
    .. [1] Teixeira, C. A. et al. (2011). EPILAB: A software package for
           studies on the prediction of epileptic seizures. Journal of
           Neuroscience Methods, 200(2), 257-271.
    """
    n_channels, n_times = data.shape
    coefs = _wavelet_coefs(data, wavelet_name)
    levdec = len(coefs) - 1
    wavelet_energy = np.zeros((n_channels, levdec))
    for j in range(n_channels):
        for l in range(levdec):
            wavelet_energy[j, l] = np.sum(coefs[levdec - l][j, :] ** 2)
    return wavelet_energy.ravel()


@nb.jit([nb.float64[:, :](nb.float64[:, :]),
         nb.float32[:, :](nb.float32[:, :])], nopython=True)
def _tk_energy(data):
    """Teager-Kaiser Energy.

    Utility function for :func:`compute_taeger_kaiser_energy`.

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    Returns
    -------
    output : ndarray, shape (n_channels, n_times - 2)
    """
    n_channels, n_times = data.shape
    tke = np.empty((n_channels, n_times - 2), dtype=data.dtype)
    for j in range(n_channels):
        for i in range(1, n_times - 1):
            tke[j, i - 1] = data[j, i] ** 2 - data[j, i - 1] * data[j, i + 1]
    return tke


def compute_teager_kaiser_energy(data, wavelet_name='db4'):
    """Compute the Teager-Kaiser energy.

    Parameters
    ----------
    data : ndarray, shape (n_channels, n_times)

    wavelet_name : str (default: 'db4')
        Wavelet name (to be used with ``pywt.Wavelet``). The full list of
        Wavelet names are given by: ``[name for family in pywt.families() for
        name in pywt.wavelist(family)]``.

    Returns
    -------
    output : ndarray, shape (n_channels * (levdec + 1) * 2,)

    Notes
    -----
    Alias of the feature function: **teager_kaiser_energy**. See [1]_.

    References
    ----------
    .. [1] Badani, S. et al. (2017). Detection of epilepsy based on discrete
           wavelet transform and Teager-Kaiser energy operator. In Calcutta
           Conference (CALCON). 2017 IEEE (pp. 164-167).
    """
    n_channels, n_times = data.shape
    coefs = _wavelet_coefs(data, wavelet_name)
    levdec = len(coefs) - 1
    tke = np.empty((n_channels, levdec + 1, 2))
    for l in range(levdec + 1):
        tk_energy = _tk_energy(coefs[l])
        tke[:, l, 0] = np.mean(tk_energy, axis=-1)
        tke[:, l, 1] = np.std(tk_energy, ddof=1, axis=-1)
    return tke.ravel()