python source code of gaussian

#
# FRETBursts - A single-molecule FRET burst analysis toolkit.
#
# Copyright (C) 2014-2016 Antonino Ingargiola <tritemio@gmail.com>
#
"""
This module provides functions to fit gaussian distributions and gaussian
distribution mixtures (2 components). These functions can be used directly,
or more often, in a typical FRETBursts workflow they are passed to higher
level methods like :meth:`fretbursts.burstlib.Data.fit_E_generic`.

Single Gaussian distribution fit:

    * :func:`gaussian_fit_hist`
    * :func:`gaussian_fit_cdf`
    * :func:`gaussian_fit_pdf`

For 2-Gaussians fit we have the following models:

    * :func:`two_gauss_mix_pdf`: *PDF of 2-components Gaussians mixture*
    * :func:`two_gauss_mix_ab`: *linear combination of 2 Gaussians*

Main functions for mixture of 2 Gaussian distribution fit:

    * :func:`two_gaussian_fit_hist` *histogram fit using `leastsq`*
    * :func:`two_gaussian_fit_hist_min` *histogram fit using `minimize`*
    * :func:`two_gaussian_fit_hist_min_ab` *the same but using _ab model*
    * :func:`two_gaussian_fit_cdf` *curve fit of the CDF*
    * :func:`two_gaussian_fit_EM` *Expectation-Maximization fit*
    * :func:`two_gaussian_fit_EM_b` *the same with boundaries*

Also, some functions to fit 2-D gaussian distributions and mixtures are
implemented but not thoroughly tested.

The reference documentation for **all** the functions follows.
"""

from __future__ import division, print_function, absolute_import

import numpy as np
import numpy.random as R
import scipy.optimize as O
import scipy.stats as S
from scipy.special import erf
from scipy.optimize import leastsq, minimize
import scipy.ndimage as ndi

#from scipy.stats import gaussian_kde
from .weighted_kde import gaussian_kde_w  # this version supports weights


def normpdf(x, mu=0, sigma=1.):
    """Return the normal pdf evaluated at `x`."""
    assert sigma > 0
    u = (x-mu)/sigma
    y = 1/(np.sqrt(2*np.pi)*sigma)*np.exp(-u*u/2)
    return y

##
# Single gaussian distribution
#

def gaussian_fit_curve(x, y, mu0=0, sigma0=1, a0=None, return_all=False,
        **kwargs):
    """Gaussian fit of curve (x,y).
    If a0 is None then only (mu,sigma) are fitted (to a gaussian density).
    `kwargs` are passed to the leastsq() function.

    If return_all=False then return only the fitted (mu,sigma) values
    If return_all=True (or full_output=True is passed to leastsq)
    then the full output of leastsq is returned.
    """
    if a0 is None:
        gauss_pdf = lambda x, m, s: np.exp(-((x-m)**2)/(2*s**2))/\
                                    (np.sqrt(2*np.pi)*s)
        err_fun = lambda p, x, y: gauss_pdf(x, *p) - y
        res = leastsq(err_fun, x0=[mu0, sigma0], args=(x, y), **kwargs)
    else:
        gauss_fun = lambda x, m, s, a: a*np.sign(s)*np.exp(-((x-m)**2)/(2*s**2))
        err_fun = lambda p, x, y: gauss_fun(x, *p) - y
        res = leastsq(err_fun, x0=[mu0, sigma0, a0], args=(x, y), **kwargs)

    if 'full_output' in kwargs:
        return_all = kwargs['full_output']
    mu, sigma = res[0][0], res[0][1]
    if return_all: return res
    return mu, sigma

def get_epdf(s, smooth=0, N=1000, smooth_pdf=False, smooth_cdf=True):
    """Compute the empirical PDF of the sample `s`.

    If smooth > 0 then apply a gaussian filter with sigma=smooth.
    N is the number of points for interpolation of the CDF on a uniform range.
    """
    ecdf = [np.sort(s), np.arange(0.5, s.size+0.5)*1./s.size]
    #ecdf = [np.sort(s), np.arange(s.size)*1./s.size]
    _x = np.linspace(s.min(), s.max(), N)
    ecdfi = [_x, np.interp(_x, ecdf[0], ecdf[1])]
    if smooth_cdf and smooth > 0:
        ecdfi[1] = ndi.filters.gaussian_filter1d(ecdfi[1], sigma=smooth)
    epdf = [ecdfi[0][:-1], np.diff(ecdfi[1])/np.diff(ecdfi[0])]
    if smooth_pdf and smooth > 0:
        epdf[1] = ndi.filters.gaussian_filter1d(epdf[1], sigma=smooth)
    return epdf

def gaussian_fit_pdf(s, mu0=0, sigma0=1, a0=1, return_all=False,
        leastsq_kwargs={}, **kwargs):
    """Gaussian fit of samples s using a fit to the empirical PDF.
    If a0 is None then only (mu,sigma) are fitted (to a gaussian density).
    `kwargs` are passed to get_epdf().
    If return_all=False then return only the fitted (mu,sigma) values
    If return_all=True (or full_output=True is passed to leastsq)
    then the full output of leastsq and the PDF curve is returned.
    """
    ## Empirical PDF
    epdf = get_epdf(s, **kwargs)

    res = gaussian_fit_curve(epdf[0], epdf[1], mu0, sigma0, a0, return_all,
            **leastsq_kwargs)
    if return_all: return res, epdf
    return res

def gaussian_fit_hist(s, mu0=0, sigma0=1, a0=None, bins=np.r_[-0.5:1.5:0.001],
        return_all=False, leastsq_kwargs={}, weights=None, **kwargs):
    """Gaussian fit of samples s fitting the hist to a Gaussian function.
    If a0 is None then only (mu,sigma) are fitted (to a gaussian density).
    kwargs are passed to the histogram function.
    If return_all=False then return only the fitted (mu,sigma) values
    If return_all=True (or full_output=True is passed to leastsq)
    then the full output of leastsq and the histogram is returned.
    `weights` optional weights for the histogram.
    """
    histogram_kwargs = dict(bins=bins, density=True, weights=weights)
    histogram_kwargs.update(**kwargs)
    H = np.histogram(s, **histogram_kwargs)
    x, y = 0.5*(H[1][:-1] + H[1][1:]), H[0]
    #bar(H[1][:-1], H[0], H[1][1]-H[1][0], alpha=0.3)

    res = gaussian_fit_curve(x, y, mu0, sigma0, a0, return_all,
            **leastsq_kwargs)
    if return_all: return res, H, x, y
    return res

def gaussian_fit_cdf(s, mu0=0, sigma0=1, return_all=False, **leastsq_kwargs):
    """Gaussian fit of samples s fitting the empirical CDF.
    Additional kwargs are passed to the leastsq() function.
    If return_all=False then return only the fitted (mu,sigma) values
    If return_all=True (or full_output=True is passed to leastsq)
    then the full output of leastsq and the histogram is returned.
    """
    ## Empirical CDF
    ecdf = [np.sort(s), np.arange(0.5, s.size+0.5)*1./s.size]

    ## Analytical Gaussian CDF
    gauss_cdf = lambda x, mu, sigma: 0.5*(1+erf((x-mu)/(np.sqrt(2)*sigma)))

    ## Fitting the empirical CDF
    err_func = lambda p, x, y: y - gauss_cdf(x, p[0], p[1])
    res = leastsq(err_func, x0=[mu0, sigma0], args=(ecdf[0], ecdf[1]),
            **leastsq_kwargs)
    if return_all: return res, ecdf
    return res[0]

def gaussian_fit_ml(s, mu_sigma_guess=[0.5, 1]):
    """Gaussian fit of samples s using the Maximum Likelihood (ML method).
    Didactical, since scipy.stats.norm.fit() implements the same method.
    """
    n = s.size
    ## Log-likelihood (to be maximized)
    log_l = lambda mu, sig: -n/2.*np.log(sig**2) - \
                             1./(2*sig**2)*np.sum((s-mu)**2)

    ## Function to be minimized
    min_fun = lambda p: -log_l(p[0], p[1])

    res = O.minimize(min_fun, [0, 0.5], method='powell',
                     options={'xtol': 1e-6, 'disp': True, 'maxiter': 1e9})

    print(res)
    mu, sigma = res['x']
    return mu, sigma

##
# Two-component gaussian mixtures
#

def two_gauss_mix_pdf(x, p):
    """PDF for the distribution of a mixture of two Gaussians."""
    mu1, sig1, mu2, sig2, a = p
    return a*normpdf(x, mu1, sig1) + (1-a)*normpdf(x, mu2, sig2)

def two_gauss_mix_ab(x, p):
    """Mixture of two Gaussians with no area constrain."""
    mu1, sig1, a1, mu2, sig2, a2 = p
    return a1*normpdf(x, mu1, sig1) + a2*normpdf(x, mu2, sig2)

def reorder_parameters(p):
    """Reorder 2-gauss mix params to have the 1st component with smaller mean.
    """
    if p[0] > p[2]:
        p = p[np.array([2, 3, 0, 1, 4])]  # swap (mu1, sig1) with (mu2, sig2)
        p[4] = 1 - p[4]                   # "swap" the alpha of the mixture
    return p

def reorder_parameters_ab(p):
    """Reorder 2-gauss mix params to have the 1st component with smaller mean.
    """
    if p[0] > p[3]:
        p = p[np.array([3, 4, 5, 0, 1, 2])]
    return p


def bound_check(val, bounds):
    """Returns `val` clipped inside the interval `bounds`."""
    if bounds[0] is not None and val < bounds[0]:
        val = bounds[0]
    if bounds[1] is not None and val > bounds[1]:
        val = bounds[1]
    return val

def two_gaussian_fit_curve(x, y, p0, return_all=False, verbose=False, **kwargs):
    """Fit a 2-gaussian mixture to the (x,y) curve.
    `kwargs` are passed to the leastsq() function.

    If return_all=False then return only the fitted paramaters
    If return_all=True then the full output of leastsq is returned.
    """
    if kwargs['method'] == 'leastsq':
        kwargs.pop('method')
        kwargs.pop('bounds')
        def err_func(p, x, y):
            return (y - two_gauss_mix_pdf(x, p))
        res = leastsq(err_func, x0=p0, args=(x, y), **kwargs)
        p = res[0]
    else:
        def err_func(p, x, y):
            return ((y - two_gauss_mix_pdf(x, p))**2).sum()
        res = minimize(err_func, x0=p0, args=(x, y), **kwargs)
        p = res.x

    if verbose:
        print(res, '\n')
    if return_all: return res
    return reorder_parameters(p)

def two_gaussian_fit_KDE_curve(s, p0=[0, 0.1, 0.6, 0.1, 0.5], weights=None,
                               bandwidth=0.05, x_pdf=None, debug=False,
                               method='SLSQP', bounds=None,
                               verbose=False, **kde_kwargs):
    """Fit sample `s` with two gaussians using a KDE pdf approximation.

    The 2-gaussian pdf is then curve-fitted to the KDE pdf.

    Arguments:
        s (array): population of samples to be fitted
        p0 (sequence-like): initial parameters [mu0, sig0, mu1, sig1, a]
        bandwidth (float): bandwidth for the KDE algorithm
        method (string): fit method, can be 'leastsq' or one of the methods
            accepted by scipy `minimize()`
        bounds (None or 5-element list): if not None, each element is a
            (min,max) pair of bounds for the corresponding parameter. This
            argument can be used only with L-BFGS-B, TNC or SLSQP methods.
            If bounds are used, parameters cannot be fixed
        x_pdf (array): array on which the KDE PDF is evaluated and curve-fitted
        weights (array): optional weigths, same size as `s` (for ex.
            1/sigma^2 ~ nt).
        debug (bool): if True perfoms more tests and print more info.

    Additional kwargs are passed to scipy.stats.gaussian_kde().

    Returns:
        Array of parameters for the 2-gaussians (5 elements)
    """
    if x_pdf is None: x_pdf = np.linspace(s.min(), s.max(), 1000)

    ## Scikit-learn KDE estimation
    #kde_skl = KernelDensity(bandwidth=bandwidth, **kde_kwargs)
    #kde_skl.fit(x)[:, np.newaxis])
    ## score_samples() returns the log-likelihood of the samples
    #log_pdf = kde_skl.score_samples(x_pdf)[:, np.newaxis])
    #kde_pdf = np.exp(log_pdf)

    ## Weighted KDE estimation
    kde = gaussian_kde_w(s, bw_method=bandwidth, weights=weights)
    kde_pdf = kde.evaluate(x_pdf)

    p = two_gaussian_fit_curve(x_pdf, kde_pdf, p0=p0, method=method,
                               bounds=bounds, verbose=verbose)
    return p


def two_gaussian_fit_EM_b(s, p0=[0, 0.1, 0.6, 0.1, 0.5], weights=None,
                          bounds=[(None, None,)]*5,
                          max_iter=300, ptol=1e-4, debug=False):
    """
    Fit the sample s with two gaussians using Expectation Maximization.

    This version allows setting boundaries for each parameter.

    Arguments:
        s (array): population of samples to be fitted
        p0 (sequence-like): initial parameters [mu0, sig0, mu1, sig1, a]
        bound (tuple of pairs): sequence of (min, max) values that constrain
            the parameters. If min or max are None, no boundary is set.
        ptol (float): convergence condition. Relative max variation of any
            parameter.
        max_iter (int): max number of iteration in case of non convergence.
        weights (array): optional weigths, same size as `s` (for ex.
            1/sigma^2 ~ nt).

    Returns:
        Array of parameters for the 2-gaussians (5 elements)
    """
    assert np.size(p0) == 5
    if weights is None: weights = np.ones(s.size)
    assert weights.size == s.size
    weights *= (1.*weights.size)/weights.sum() # Normalize to (#samples)
    #weights /= weights.sum()  # Normalize to 1
    if debug: assert np.abs(weights.sum() - s.size) < 1e-6
    bounds_mu = [bounds[0], bounds[2]]
    bounds_sig = [bounds[1], bounds[3]]
    bounds_pi0 = bounds[4]

    # Initial guess of parameters and initializations
    mu = np.array([p0[0], p0[2]])
    sig = np.array([p0[1], p0[3]])
    pi_ = np.array([p0[4], 1-p0[4]])

    gamma = np.zeros((2, s.size))
    N_ = np.zeros(2)
    p_new = np.array(p0)

    # EM loop
    counter = 0
    stop_iter, converged = False, False
    while not stop_iter:
        # Compute the responsibility func. (gamma) and the new parameters
        for k in [0, 1]:
            gamma[k, :] = weights*pi_[k]*normpdf(s, mu[k], sig[k]) / \
                            two_gauss_mix_pdf(s, p_new)
            N_[k] = gamma[k, :].sum()
            mu[k] = np.sum(gamma[k]*s)/N_[k]
            mu[k] = bound_check(mu[k], bounds_mu[k])
            sig[k] = np.sqrt( np.sum(gamma[k]*(s-mu[k])**2)/N_[k] )
            sig[k] = bound_check(sig[k], bounds_sig[k])
            if k < 1:
                pi_[k] = N_[k]/s.size
                pi_[k] = bound_check(pi_[k], bounds_pi0)
            else:
                pi_[k] = 1 - pi_[0]
        p_old = p_new
        p_new = np.array([mu[0], sig[0], mu[1], sig[1], pi_[0]])
        if debug:
            assert np.abs(N_.sum() - s.size)/float(s.size) < 1e-6
            assert np.abs(pi_.sum() - 1) < 1e-6

        # Convergence check
        counter += 1
        relative_delta = np.abs(p_new - p_old)/p_new
        converged = relative_delta.max() < ptol
        stop_iter = converged or (counter >= max_iter)

    if debug:
        print("Iterations: ", counter)
    if not converged:
        print("WARNING: Not converged, max iteration (%d) reached." % max_iter)
    return reorder_parameters(p_new)

def two_gaussian_fit_EM(s, p0=[0, 0.1, 0.6, 0.1, 0.5], max_iter=300, ptol=1e-4,
                        fix_mu=[0, 0], fix_sig=[0, 0], debug=False,
                        weights=None):
    """
    Fit the sample s with two gaussians using Expectation Maximization.

    This vesion allows to optionally fix mean or std. dev. of each component.

    Arguments:
        s (array): population of samples to be fitted
        p0 (sequence-like): initial parameters [mu0, sig0, mu1, sig1, a]
        bound (tuple of pairs): sequence of (min, max) values that constrain
            the parameters. If min or max are None, no boundary is set.
        ptol (float): convergence condition. Relative max variation of any
            parameter.
        max_iter (int): max number of iteration in case of non convergence.
        weights (array): optional weigths, same size as `s` (for ex.
            1/sigma^2 ~ nt).

    Returns:
        Array of parameters for the 2-gaussians (5 elements)
    """
    assert np.size(p0) == 5
    if weights is None: weights = np.ones(s.size)
    assert weights.size == s.size
    weights *= (1.*weights.size)/weights.sum() # Normalize to (#samples)
    #weights /= weights.sum()  # Normalize to 1
    if debug: assert np.abs(weights.sum() - s.size) < 1e-6

    # Initial guess of parameters and initializations
    mu = np.array([p0[0], p0[2]])
    sig = np.array([p0[1], p0[3]])
    pi_ = np.array([p0[4], 1-p0[4]])

    gamma = np.zeros((2, s.size))
    N_ = np.zeros(2)
    p_new = np.array(p0)

    # EM loop
    counter = 0
    stop_iter, converged = False, False
    while not stop_iter:
        # Compute the responsibility func. (gamma) and the new parameters
        for k in [0, 1]:
            gamma[k, :] = weights*pi_[k]*normpdf(s, mu[k], sig[k]) / \
                    two_gauss_mix_pdf(s, p_new)
            ## Uncomment for SCHEME2
            #gamma[k, :] = pi_[k]*normpdf(s, mu[k], sig[k]) / \
            #        two_gauss_mix_pdf(s, p_new)
            N_[k] = gamma[k, :].sum()
            if not fix_mu[k]:
                mu[k] = np.sum(gamma[k]*s)/N_[k]
                ## Uncomment for SCHEME2
                #mu[k] = np.sum(weights*gamma[k]*s)/N_[k]
            if not fix_sig[k]:
                sig[k] = np.sqrt( np.sum(gamma[k]*(s-mu[k])**2)/N_[k] )
            pi_[k] = N_[k]/s.size
        p_old = p_new
        p_new = np.array([mu[0], sig[0], mu[1], sig[1], pi_[0]])
        if debug:
            assert np.abs(N_.sum() - s.size)/float(s.size) < 1e-6
            assert np.abs(pi_.sum() - 1) < 1e-6

        # Convergence check
        counter += 1
        fixed = np.concatenate([fix_mu, fix_sig, [0]]).astype(bool)
        relative_delta = np.abs(p_new[~fixed] - p_old[~fixed])/p_new[~fixed]
        converged = relative_delta.max() < ptol
        stop_iter = converged or (counter >= max_iter)

    if debug:
        print("Iterations: ", counter)
    if not converged:
        print("WARNING: Not converged, max iteration (%d) reached." % max_iter)
    return reorder_parameters(p_new)

def two_gaussian_fit_hist(s, bins=np.r_[-0.5:1.5:0.001], weights=None,
        p0=[0.2,1,0.8,1,0.3], fix_mu=[0,0], fix_sig=[0,0], fix_a=False):
    """Fit the sample s with 2-gaussian mixture (histogram fit).

    Uses scipy.optimize.leastsq function. Parameters can be fixed but
    cannot be constrained in an interval.

    Arguments:
        s (array): population of samples to be fitted
        p0 (5-element list or array): initial guess or parameters
        bins (int or array): bins passed to `np.histogram()`
        weights (array): optional weights passed to `np.histogram()`
        fix_a (tuple of bools): Whether to fix the amplitude of the gaussians
        fix_mu (tuple of bools): Whether to fix the mean of the gaussians
        fix_sig (tuple of bools): Whether to fix the sigma of the gaussians

    Returns:
        Array of parameters for the 2-gaussians (5 elements)
    """
    assert np.size(p0) == 5
    fix = np.array([fix_mu[0], fix_sig[0], fix_mu[1], fix_sig[1], fix_a],
                dtype=bool)
    p0 = np.array(p0)
    p0_free = p0[-fix]
    p0_fix = p0[fix]

    H = np.histogram(s, bins=bins, weights=weights, density=True)
    x, y = 0.5*(H[1][:-1] + H[1][1:]), H[0]
    assert x.size == y.size

    ## Fitting
    def err_func(p, x, y, fix, p_fix, p_complete):
        p_complete[-fix] = p
        p_complete[fix] = p_fix
        return y - two_gauss_mix_pdf(x, p_complete)

    p_complete = np.zeros(5)
    p, v = leastsq(err_func, x0=p0_free, args=(x, y, fix, p0_fix, p_complete))
    p_new = np.zeros(5)
    p_new[-fix] = p
    p_new[fix] = p0_fix
    return reorder_parameters(p_new)


def two_gaussian_fit_hist_min(s, bounds=None, method='L-BFGS-B',
        bins=np.r_[-0.5:1.5:0.001], weights=None,  p0=[0.2,1,0.8,1,0.3],
        fix_mu=[0,0], fix_sig=[0,0], fix_a=False, verbose=False):
    """Fit the sample `s` with 2-gaussian mixture (histogram fit). [Bounded]

    Uses scipy.optimize.minimize allowing constrained minimization.

    Arguments:
        s (array): population of samples to be fitted
        method (string): one of the methods accepted by scipy `minimize()`
        bounds (None or 5-element list): if not None, each element is a
            (min,max) pair of bounds for the corresponding parameter. This
            argument can be used only with L-BFGS-B, TNC or SLSQP methods.
            If bounds are used, parameters cannot be fixed
        p0 (5-element list or array): initial guess or parameters
        bins (int or array): bins passed to `np.histogram()`
        weights (array): optional weights passed to `np.histogram()`
        fix_a (tuple of bools): Whether to fix the amplitude of the gaussians
        fix_mu (tuple of bools): Whether to fix the mean of the gaussians
        fix_sig (tuple of bools): Whether to fix the sigma of the gaussians
        verbose (boolean): allows printing fit information

    Returns:
        Array of parameters for the 2-gaussians (5 elements)
    """
    assert np.size(p0) == 5
    fix = np.array([fix_mu[0], fix_sig[0], fix_mu[1], fix_sig[1], fix_a],
                dtype=bool)
    p0 = np.array(p0)
    p0_free = p0[-fix]
    p0_fix = p0[fix]

    H = np.histogram(s, bins=bins, weights=weights, density=True)
    x, y = 0.5*(H[1][:-1] + H[1][1:]), H[0]
    assert x.size == y.size

    ## Fitting
    def err_func(p, x, y, fix, p_fix, p_complete):
        p_complete[-fix] = p
        p_complete[fix] = p_fix
        return ((y - two_gauss_mix_pdf(x, p_complete))**2).sum()

    p_complete = np.zeros(5)
    res = minimize(err_func, x0=p0_free, args=(x, y, fix, p0_fix, p_complete),
                    method=method, bounds=bounds)
    if verbose: print(res)
    p_new = np.zeros(5)
    p_new[-fix] = res.x
    p_new[fix] = p0_fix
    return reorder_parameters(p_new)

def two_gaussian_fit_hist_min_ab(s, bounds=None, method='L-BFGS-B',
        bins=np.r_[-0.5:1.5:0.001], weights=None,  p0=[0.2,1,0.8,1,0.3],
        fix_mu=[0,0], fix_sig=[0,0], fix_a=[0,0], verbose=False):
    """Histogram fit of sample `s` with 2-gaussian functions.

    Uses scipy.optimize.minimize allowing constrained minimization. Also
    each parameter can be fixed.

    The order of the parameters is: mu1, sigma1, a1, mu2, sigma2, a2.

    Arguments:
        s (array): population of samples to be fitted
        method (string): one of the methods accepted by scipy `minimize()`
        bounds (None or 6-element list): if not None, each element is a
            (min,max) pair of bounds for the corresponding parameter. This
            argument can be used only with L-BFGS-B, TNC or SLSQP methods.
            If bounds are used, parameters cannot be fixed
        p0 (6-element list or array): initial guess or parameters
        bins (int or array): bins passed to `np.histogram()`
        weights (array): optional weights passed to `np.histogram()`
        fix_a (tuple of bools): Whether to fix the amplitude of the gaussians
        fix_mu (tuple of bools): Whether to fix the mean of the gaussians
        fix_sig (tuple of bools): Whether to fix the sigma of the gaussians
        verbose (boolean): allows printing fit information

    Returns:
        Array of parameters for the 2-gaussians (6 elements)
    """
    nparams = 6
    assert np.size(p0) == nparams
    fix = np.array([fix_mu[0], fix_sig[0], fix_a[0],
                    fix_mu[1], fix_sig[1], fix_a[1]], dtype=bool)
    p0 = np.asarray(p0)
    p0_free = p0[-fix]
    p0_fix = p0[fix]

    counts, bins = np.histogram(s, bins=bins, weights=weights, density=True)
    x = bins[:-1] + 0.5*(bins[1] - bins[0])
    y = counts
    assert x.size == y.size

    ## Fitting
    def err_func(p, x, y, fix, p_fix, p_complete):
        p_complete[-fix] = p
        p_complete[fix] = p_fix
        return ((y - two_gauss_mix_ab(x, p_complete))**2).sum()

    p_complete = np.zeros(nparams)
    res = minimize(err_func, x0=p0_free, args=(x, y, fix, p0_fix, p_complete),
                    method=method, bounds=bounds)
    if verbose: print(res)
    p_new = np.zeros(nparams)
    p_new[-fix] = res.x
    p_new[fix] = p0_fix
    return reorder_parameters_ab(p_new)

def two_gaussian_fit_cdf(s, p0=[0., .05, .6, .1, .5],
                         fix_mu=[0, 0], fix_sig=[0, 0]):
    """Fit the sample s with two gaussians using a CDF fit.

    Curve fit 2-gauss mixture Cumulative Distribution Function (CDF) to the
    empirical CDF for sample `s`.

    Note that with a CDF fit no weighting is possible.

    Arguments:
        s (array): population of samples to be fitted
        p0 (5-element list or array): initial guess or parameters
        fix_mu (tuple of bools): Whether to fix the mean of the gaussians
        fix_sig (tuple of bools): Whether to fix the sigma of the gaussians

    Returns:
        Array of parameters for the 2-gaussians (5 elements)
    """
    assert np.size(p0) == 5
    fix = np.array([fix_mu[0], fix_sig[0], fix_mu[1], fix_sig[1], 0],
                    dtype=bool)
    p0 = np.array(p0)
    p0_free = p0[-fix]
    p0_fix = p0[fix]

    ## Empirical CDF
    ecdf = [np.sort(s), np.arange(0.5, s.size+0.5)*1./s.size]
    x, y = ecdf

    ## Analytical gaussian CDF
    gauss_cdf = lambda x, mu, sigma: 0.5*(1+erf((x-mu)/(np.sqrt(2)*sigma)))
    def two_gauss_mix_cdf(x, p):
        return p[4]*gauss_cdf(x, p[0], p[1]) + (1-p[4])*gauss_cdf(x, p[2], p[3])

    ## Fitting the empirical CDF
    def err_func(p, x, y, fix, p_fix, p_complete):
        p_complete[-fix] = p
        p_complete[fix] = p_fix
        return y - two_gauss_mix_cdf(x, p_complete)

    p_complete = np.zeros(5)
    p, v = leastsq(err_func, x0=p0_free, args=(x, y, fix, p0_fix, p_complete))
    p_new = np.zeros(5)
    p_new[-fix] = p
    p_new[fix] = p0_fix
    return reorder_parameters(p_new)

def test_two_gauss():
    m01 = 0.
    m02 = 0.6
    s01 = 0.05
    s02 = 0.1
    alpha = 0.
    p_real = [m01, s01, m02, s02, alpha]

    N = 500
    si1 = round(alpha*N)
    si2 = round((1-alpha)*N)
    s1 = R.normal(size=si1, loc=m01, scale=s01)
    s2 = R.normal(size=si2, loc=m02, scale=s02)
    s = np.r_[s1,s2]

    pc = two_gaussian_fit_cdf(s, fix_mu=[1,0], p0=[-0.01,0.05,0.5,0.2,0.4])
    ph = two_gaussian_fit_hist(s, fix_mu=[1,0], p0=[-0.01,0.05,0.5,0.2,0.4])
    pe = two_gaussian_fit_EM(s, fix_mu=[1,0], p0=[-0.01,0.05,0.5,0.2,0.4])

    hist(s, bins=40, normed=True)

    x = np.r_[s.min()-1:s.max()+1:200j]
    plot(x, a*normpdf(x,mu1,sig1), lw=2)
    plot(x, (1-a)*normpdf(x,mu2,sig2), lw=2)
    plot(x, two_gauss_mix_pdf(x, p0), lw=2)

    axvline(m01, lw=2, color='k', alpha=0.3)
    axvline(m02, lw=2, color='gray', alpha=0.3)
    axvline(mu1, lw=2, ls='--', color='k', alpha=0.3)
    axvline(mu2, lw=2, ls='--', color='gray', alpha=0.3)
    axvline(mu1h, lw=2, ls='--', color='r', alpha=0.3)
    axvline(mu2h, lw=2, ls='--', color='r', alpha=0.3)

def compare_two_gauss():
    m01 = 0.
    m02 = 0.5
    s01 = 0.08
    s02 = 0.15
    alpha = 0.7
    p_real = [m01, s01, m02, s02, alpha]

    N = 1000
    si1 = round(N*alpha)
    si2 = round((1-alpha)*N)

    p0 = [-0.01,0.05,0.6,0.2,0.4]
    fix_mu = [0,0]

    n = 500
    PC, PH, PE = np.zeros((n,5)), np.zeros((n,5)), np.zeros((n,5))
    for i in xrange(n):
        s1 = R.normal(size=si1, loc=m01, scale=s01)
        s2 = R.normal(size=si2, loc=m02, scale=s02)
        s = np.r_[s1,s2]

        pc = two_gaussian_fit_cdf(s, fix_mu=fix_mu, p0=p0)
        ph = two_gaussian_fit_hist(s, fix_mu=fix_mu, p0=p0)
        pe = two_gaussian_fit_EM(s, fix_mu=fix_mu, p0=p0)

        PC[i], PH[i], PE[i] = pc, ph, pe

    Label = ['Mu1', 'Sig1', 'Mu2', 'Sig2', 'Alpha']
    ftype = 'png'

    for i in range(5):
        figure()
        title(Label[i])
        vmin = min([PC[:,i].min(), PH[:,i].min(), PE[:,i].min()])
        vmax = max([PC[:,i].max(), PH[:,i].max(), PE[:,i].max()])
        b = np.r_[vmin:vmax:80j]
        if vmax == vmin: b = np.r_[vmin-.1:vmax+.1:200j]
        hist(PC[:,i], bins=b, alpha=0.3, label='CDF')
        hist(PH[:,i], bins=b, alpha=0.3, label='Hist')
        hist(PE[:,i], bins=b, alpha=0.3, label='EM')
        legend(loc='best')
        axvline(p_real[i], color='k', lw=2)
        #savefig('Two-gaussian Fit Comp - %s.png' % Label[i])

def gaussian2d_fit(sx, sy, guess=[0.5,1]):
    """2D-Gaussian fit of samples S using a fit to the empirical CDF."""
    assert sx.size == sy.size

    ## Empirical CDF
    ecdfx = [np.sort(sx), np.arange(0.5,sx.size+0.5)*1./sx.size]
    ecdfy = [np.sort(sy), np.arange(0.5,sy.size+0.5)*1./sy.size]

    ## Analytical gaussian CDF
    gauss_cdf = lambda x, mu, sigma: 0.5*(1+erf((x-mu)/(np.sqrt(2)*sigma)))

    ## Fitting the empirical CDF
    fitfunc = lambda p, x: gauss_cdf(x, p[0], p[1])
    errfunc = lambda p, x, y: fitfunc(p, x) - y
    px,v = leastsq(errfunc, x0=guess, args=(ecdfx[0],ecdfx[1]))
    py,v = leastsq(errfunc, x0=guess, args=(ecdfy[0],ecdfy[1]))
    print("2D Gaussian CDF fit", px, py)

    mux, sigmax = px[0], px[1]
    muy, sigmay = py[0], py[1]
    return mux, sigmax, muy, sigmay

def test_gaussian2d_fit():
    mx0 = 0.1
    my0 = 0.9
    sigx0 = 0.4
    sigy0 = 0.25

    Size = 500
    sx = R.normal(size=Size, loc=mx0, scale=sigx0)
    sy = R.normal(size=Size, loc=my0, scale=sigy0)

    mux, sigmax, muy, sigmay = gaussian2d_fit(sx, sy)

    plot(sx, sy, 'o', alpha=0.2, mew=0)

    X,Y = np.mgrid[sx.min()-1:sx.max()+1:200j, sy.min()-1:sy.max()+1:200j]

    def gauss2d(X,Y, mx, my, sigx, sigy):
        return np.exp(-((X-mx)**2)/(2*sigx**2))*np.exp(-((Y-my)**2)/(2*sigy**2))

    contour(X,Y,gauss2d(X,Y,mux,muy,sigmax,sigmay))

    plot(mx0,my0, 'ok', mew=0, ms=10)
    plot(mux,muy, 'x', mew=2, ms=10, color='green')


def two_gaussian2d_fit(sx, sy, guess=[0.5,1]):
    """2D-Gaussian fit of samples S using a fit to the empirical CDF."""
    ## UNFINISHED (I have 2 alphas unp.sign the xy projections)
    assert sx.size == sy.size

    ## Empirical CDF
    ecdfx = [np.sort(sx), np.arange(0.5,sx.size+0.5)*1./sx.size]
    ecdfy = [np.sort(sy), np.arange(0.5,sy.size+0.5)*1./sy.size]

    ## Analytical gaussian CDF
    gauss_cdf = lambda x, mu, sigma: 0.5*(1+erf((x-mu)/(np.sqrt(2)*sigma)))

    gauss2d_cdf = lambda X,Y,mx,sx,my,sy: gauss_cdf(X,mx,sx)*gauss_cdf(Y,my,sy)

    two_cdf = lambda x, m1, s1, m2, s2, a:\
        a*gauss_cdf(x,m1,s1)+(1-a)*gauss_cdf(x,m2,s2)

    two2d_cdf = lambda X,Y, mx1, sx1, mx2, sx2, my1, sy1, my2, sy2, a:\
        a*gauss2d_cdf(X,Y,mx1,sx1,my1,sy1)+(1-a)*gauss_cdf(X,Y,mx2,sx2,my2,sy2)

    ## Fitting the empirical CDF
    fitfunc = lambda p, x: two_cdf(x, *p)
    errfunc = lambda p, x, y: fitfunc(p, x) - y
    fitfunc2d = lambda p, X,Y: two2d_cdf(X,Y, *p)
    errfunc2d = lambda p, X,Y,Z: fitfunc2d(p, X,Y) - Z

    px,v = leastsq(errfunc, x0=guess, args=(ecdfx[0],ecdfx[1]))
    py,v = leastsq(errfunc, x0=guess, args=(ecdfy[0],ecdfy[1]))
    print("2D Two-Gaussians CDF fit", px, py)

    mux1, sigmax1, mux2, sigmax2, alphax = px
    muy1, sigmay1, muy2, sigmay2, alphay = py
    return mu1, sigma1, mu2, sigma2, alpha

def test_gaussian_fit():
    m0 = 0.1
    s0 = 0.4
    size = 500

    s = R.normal(size=size, loc=m0, scale=s0)
    #s = s[s<0.4]
    mu, sig = gaussian_fit(s)
    mu1, sig1 = S.norm.fit(s)
    mu2, sig2 = gaussian_fit_ml(s)

    print("ECDF ", mu, sig)
    print("ML         ", mu1, sig1)
    print("ML (manual)", mu2, sig2)

    H = np.histogram(s, bins=20, density=True)
    h = H[0]
    bw = H[1][1] - H[1][0]
    #bins_c = H[1][:-1]+0.5*bw
    bar(H[1][:-1], H[0], bw, alpha=0.3)

    x = np.r_[s.min()-1:s.max()+1:200j]
    plot(x, normpdf(x,m0,s0), lw=2, color='grey')
    plot(x, normpdf(x,mu,sig), lw=2, color='r', alpha=0.5)
    plot(x, normpdf(x,mu1,sig1), lw=2, color='b', alpha=0.5)

if __name__ == '__main__':
    #compare_two_gauss()
    #test_gaussian2d_fit()
    #test_gaussian_fit()
    #show()
    pass