Python scipy.stats.gaussian_kde() Examples

The following are 30 code examples of scipy.stats.gaussian_kde(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: kde_mlab.py    From xrt with MIT License 9 votes vote down vote up
def main():
    mu = np.array([1, 10, 20])
    sigma = np.matrix([[20, 10, 10],
                       [10, 25, 1],
                       [10, 1, 50]])
    np.random.seed(100)
    data = np.random.multivariate_normal(mu, sigma, 1000)
    values = data.T

    kde = stats.gaussian_kde(values)

    # Create a regular 3D grid with 50 points in each dimension
    xmin, ymin, zmin = data.min(axis=0)
    xmax, ymax, zmax = data.max(axis=0)
    xi, yi, zi = np.mgrid[xmin:xmax:50j, ymin:ymax:50j, zmin:zmax:50j]

    # Evaluate the KDE on a regular grid...
    coords = np.vstack([item.ravel() for item in [xi, yi, zi]])
    density = kde(coords).reshape(xi.shape)

    # Visualize the density estimate as isosurfaces
    mlab.contour3d(xi, yi, zi, density, opacity=0.5)
    mlab.axes()
    mlab.show() 
Example #2
Source File: mv_measures.py    From vnpy_crypto with MIT License 7 votes vote down vote up
def mutualinfo_kde(y, x, normed=True):
    '''mutual information of two random variables estimated with kde

    '''
    nobs = len(x)
    if not len(y) == nobs:
        raise ValueError('both data arrays need to have the same size')
    x = np.asarray(x, float)
    y = np.asarray(y, float)
    yx = np.vstack((y,x))
    kde_x = gaussian_kde(x)(x)
    kde_y = gaussian_kde(y)(y)
    kde_yx = gaussian_kde(yx)(yx)

    mi_obs = np.log(kde_yx) - np.log(kde_x) - np.log(kde_y)
    mi = mi_obs.sum() / nobs
    if normed:
        mi_normed = np.sqrt(1. - np.exp(-2 * mi))
        return mi_normed
    else:
        return mi 
Example #3
Source File: test_kdeoth.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_pdf_logpdf():
    np.random.seed(1)
    n_basesample = 50
    xn = np.random.randn(n_basesample)

    # Default
    gkde = stats.gaussian_kde(xn)

    xs = np.linspace(-15, 12, 25)
    pdf = gkde.evaluate(xs)
    pdf2 = gkde.pdf(xs)
    assert_almost_equal(pdf, pdf2, decimal=12)

    logpdf = np.log(pdf)
    logpdf2 = gkde.logpdf(xs)
    assert_almost_equal(logpdf, logpdf2, decimal=12)

    # There are more points than data
    gkde = stats.gaussian_kde(xs)
    pdf = np.log(gkde.evaluate(xn))
    pdf2 = gkde.logpdf(xn)
    assert_almost_equal(pdf, pdf2, decimal=12) 
Example #4
Source File: modeller.py    From InSilicoSeq with MIT License 6 votes vote down vote up
def insert_size(insert_size_distribution):
    """Calculate cumulative distribution function from the raw insert size
    distributin. Uses 1D kernel density estimation.

    Args:
        insert_size_distribution (list): list of insert sizes from aligned
        read pairs

    Returns:
        1darray: a cumulative density function
    """
    kde = stats.gaussian_kde(
        insert_size_distribution,
        bw_method=0.2 / np.std(insert_size_distribution, ddof=1))
    x_grid = np.linspace(
        min(insert_size_distribution),
        max(insert_size_distribution), 1000)
    kde = kde.evaluate(x_grid)
    cdf = np.cumsum(kde)
    cdf = cdf / cdf[-1]
    return cdf 
Example #5
Source File: kde_likelihood.py    From lenstronomy with MIT License 6 votes vote down vote up
def __init__(self, D_d_sample, D_delta_t_sample, kde_type='scipy_gaussian', bandwidth=1):
        """

        :param D_d_sample: 1-d numpy array of angular diameter distances to the lens plane
        :param D_delta_t_sample: 1-d numpy array of time-delay distances
        kde_type : string
            The kernel to use.  Valid kernels are
            'scipy_gaussian' or
            ['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']
            Default is 'gaussian'.
        :param bandwidth: width of kernel (in same units as the angular diameter quantities)
        """
        values = np.vstack([D_d_sample, D_delta_t_sample])
        if kde_type == 'scipy_gaussian':
            self._PDF_kernel = stats.gaussian_kde(values)
        else:
            from sklearn.neighbors import KernelDensity
            self._kde = KernelDensity(bandwidth=bandwidth, kernel=kde_type)
            values = np.vstack([D_d_sample, D_delta_t_sample])
            self._kde.fit(values.T)
        self._kde_type = kde_type 
Example #6
Source File: _core.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
              column_num=None, stacking_id=None, **kwds):
        from scipy.stats import gaussian_kde
        from scipy import __version__ as spv

        y = remove_na_arraylike(y)

        if LooseVersion(spv) >= '0.11.0':
            gkde = gaussian_kde(y, bw_method=bw_method)
        else:
            gkde = gaussian_kde(y)
            if bw_method is not None:
                msg = ('bw_method was added in Scipy 0.11.0.' +
                       ' Scipy version in use is {spv}.'.format(spv=spv))
                warnings.warn(msg)

        y = gkde.evaluate(ind)
        lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
        return lines 
Example #7
Source File: graph_ABC.py    From abcpy with BSD 3-Clause Clear License 6 votes vote down vote up
def plot(samples, path = None, true_value = 5, title = 'ABC posterior'): 
	Bayes_estimate = np.mean(samples, axis = 0)
	theta = true_value
	xmin, xmax = max(samples[:,0]), min(samples[:,0])
	positions = np.linspace(xmin, xmax, samples.shape[0])
	gaussian_kernel = gaussian_kde(samples[:,0].reshape(samples.shape[0],))
	values = gaussian_kernel(positions)
	plt.figure()
	plt.plot(positions,gaussian_kernel(positions))
	plt.plot([theta, theta],[min(values), max(values)+.1*(max(values)-min(values))])
	plt.plot([Bayes_estimate, Bayes_estimate],[min(values), max(values)+.1*(max(values)-min(values))])
	plt.ylim([min(values), max(values)+.1*(max(values)-min(values))])
	plt.xlabel(r'$\theta$')
	plt.ylabel('density')
	#plt.xlim([0,1])
	plt.rc('axes', labelsize=15) 
	plt.legend(loc='best', frameon=False, numpoints=1)
	font = {'size'   : 15}
	plt.rc('font', **font)
	plt.title(title)
	if path is not None :
		plt.savefig(path)
	return plt 
Example #8
Source File: plot.py    From pypath with GNU General Public License v3.0 6 votes vote down vote up
def add_density_lines(self, **kwargs):
        for i, d in enumerate(self.data):
            self.kde_bandwidth = self.kde_base / d.std(ddof=1)
            density = stats.gaussian_kde(d, bw_method=self.kde_bandwidth)
            x = np.arange(self.lowest, self.highest,
                          self.highest / len(self.hist[0][i]))
            y = np.array(density(x))
            limit = np.percentile(x, self.kde_perc)
            y = y[np.where(x < limit)]
            x = x[np.where(x < limit)]
            #y2 = mpl.mlab.normpdf(x, np.mean(d), np.std(d))
            ylim = self.ax.get_ylim()
            xlim = self.ax.get_xlim()
            self.ax.plot(
                x,
                y,
                ls='--',
                lw=.5,
                c=self.palette[i][0],
                label='%s, density' % self.labels[i])
            #self.ax.plot(x, y2, ls = ':', lw = .5, c = self.palette[i][0])
            self.ax.set_ylim(ylim)
            self.ax.set_xlim(xlim) 
Example #9
Source File: mv_measures.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def mutualinfo_kde_2sample(y, x, normed=True):
    '''mutual information of two random variables estimated with kde

    '''
    nobs = len(x)
    x = np.asarray(x, float)
    y = np.asarray(y, float)
    #yx = np.vstack((y,x))
    kde_x = gaussian_kde(x.T)(x.T)
    kde_y = gaussian_kde(y.T)(x.T)
    #kde_yx = gaussian_kde(yx)(yx)

    mi_obs = np.log(kde_x) - np.log(kde_y)
    if len(mi_obs) != nobs:
        raise ValueError("Wrong number of observations")
    mi = mi_obs.mean()
    if normed:
        mi_normed = np.sqrt(1. - np.exp(-2 * mi))
        return mi_normed
    else:
        return mi 
Example #10
Source File: density.py    From plotnine with GNU General Public License v2.0 6 votes vote down vote up
def kde_scipy(data, grid, **kwargs):
    """
    Kernel Density Estimation with Scipy

    Parameters
    ----------
    data : numpy.array
        Data points used to compute a density estimator. It
        has `n x p` dimensions, representing n points and p
        variables.
    grid : numpy.array
        Data points at which the desity will be estimated. It
        has `m x p` dimensions, representing m points and p
        variables.

    Returns
    -------
    out : numpy.array
        Density estimate. Has `m x 1` dimensions
    """
    kde = gaussian_kde(data.T, **kwargs)
    return kde.evaluate(grid.T) 
Example #11
Source File: kde.py    From xrt with MIT License 6 votes vote down vote up
def main():
    mu = np.array([1, 10, 20])
    sigma = np.matrix([[20, 10, 10],
                       [10, 25, 1],
                       [10, 1, 50]])
    np.random.seed(100)
    data = np.random.multivariate_normal(mu, sigma, 1000)
    print(data.shape)
    values = data.T
    print(values.shape)

    kde = stats.gaussian_kde(values)
    density = kde(values)

    fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))
    x, y, z = values
    #print(x.shape)
    #print(y.shape)
    #print(z.shape)
    ax.scatter(x, y, z, c=density)
    plt.show() 
Example #12
Source File: rplot.py    From Computable with MIT License 6 votes vote down vote up
def work(self, fig=None, ax=None):
        """Draw a one dimensional kernel density plot.
        You can specify either a figure or an axis to draw on.

        Parameters:
        -----------
        fig: matplotlib figure object
        ax: matplotlib axis object to draw on

        Returns:
        --------
        fig, ax: matplotlib figure and axis objects
        """
        if ax is None:
            if fig is None:
                return fig, ax
            else:
                ax = fig.gca()
        from scipy.stats import gaussian_kde
        x = self.data[self.aes['x']]
        gkde = gaussian_kde(x)
        ind = np.linspace(x.min(), x.max(), 200)
        ax.plot(ind, gkde.evaluate(ind))
        return fig, ax 
Example #13
Source File: test_kdeoth.py    From Computable with MIT License 6 votes vote down vote up
def test_gaussian_kde_monkeypatch():
    """Ugly, but people may rely on this.  See scipy pull request 123,
    specifically the linked ML thread "Width of the Gaussian in stats.kde".
    If it is necessary to break this later on, that is to be discussed on ML.
    """
    x1 = np.array([-7, -5, 1, 4, 5], dtype=np.float)
    xs = np.linspace(-10, 10, num=50)

    # The old monkeypatched version to get at Silverman's Rule.
    kde = stats.gaussian_kde(x1)
    kde.covariance_factor = kde.silverman_factor
    kde._compute_covariance()
    y1 = kde(xs)

    # The new saner version.
    kde2 = stats.gaussian_kde(x1, bw_method='silverman')
    y2 = kde2(xs)

    assert_array_almost_equal_nulp(y1, y2, nulp=10) 
Example #14
Source File: _core.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
              column_num=None, stacking_id=None, **kwds):
        from scipy.stats import gaussian_kde
        from scipy import __version__ as spv

        y = remove_na_arraylike(y)

        if LooseVersion(spv) >= '0.11.0':
            gkde = gaussian_kde(y, bw_method=bw_method)
        else:
            gkde = gaussian_kde(y)
            if bw_method is not None:
                msg = ('bw_method was added in Scipy 0.11.0.' +
                       ' Scipy version in use is {spv}.'.format(spv=spv))
                warnings.warn(msg)

        y = gkde.evaluate(ind)
        lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
        return lines 
Example #15
Source File: kde.py    From carl with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, **kwargs):
        """Fit the KDE estimator to data.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            The samples.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        X = check_array(X).T
        self.kde_ = gaussian_kde(X, bw_method=self.bandwidth)
        return self 
Example #16
Source File: _core.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
              column_num=None, stacking_id=None, **kwds):
        from scipy.stats import gaussian_kde
        from scipy import __version__ as spv

        y = remove_na_arraylike(y)

        if LooseVersion(spv) >= '0.11.0':
            gkde = gaussian_kde(y, bw_method=bw_method)
        else:
            gkde = gaussian_kde(y)
            if bw_method is not None:
                msg = ('bw_method was added in Scipy 0.11.0.' +
                       ' Scipy version in use is %s.' % spv)
                warnings.warn(msg)

        y = gkde.evaluate(ind)
        lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
        return lines 
Example #17
Source File: _embedding_density.py    From scanpy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _calc_density(x: np.ndarray, y: np.ndarray):
    """\
    Function to calculate the density of cells in an embedding.
    """
    from scipy.stats import gaussian_kde

    # Calculate the point density
    xy = np.vstack([x, y])
    z = gaussian_kde(xy)(xy)

    min_z = np.min(z)
    max_z = np.max(z)

    # Scale between 0 and 1
    scaled_z = (z - min_z) / (max_z - min_z)

    return scaled_z 
Example #18
Source File: mv_measures.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def mutualinfo_kde_2sample(y, x, normed=True):
    '''mutual information of two random variables estimated with kde

    '''
    nobs = len(x)
    x = np.asarray(x, float)
    y = np.asarray(y, float)
    #yx = np.vstack((y,x))
    kde_x = gaussian_kde(x.T)(x.T)
    kde_y = gaussian_kde(y.T)(x.T)
    #kde_yx = gaussian_kde(yx)(yx)

    mi_obs = np.log(kde_x) - np.log(kde_y)
    if len(mi_obs) != nobs:
        raise ValueError("Wrong number of observations")
    mi = mi_obs.mean()
    if normed:
        mi_normed = np.sqrt(1. - np.exp(-2 * mi))
        return mi_normed
    else:
        return mi 
Example #19
Source File: mv_measures.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def mutualinfo_kde(y, x, normed=True):
    '''mutual information of two random variables estimated with kde

    '''
    nobs = len(x)
    if not len(y) == nobs:
        raise ValueError('both data arrays need to have the same size')
    x = np.asarray(x, float)
    y = np.asarray(y, float)
    yx = np.vstack((y,x))
    kde_x = gaussian_kde(x)(x)
    kde_y = gaussian_kde(y)(y)
    kde_yx = gaussian_kde(yx)(yx)

    mi_obs = np.log(kde_yx) - np.log(kde_x) - np.log(kde_y)
    mi = mi_obs.sum() / nobs
    if normed:
        mi_normed = np.sqrt(1. - np.exp(-2 * mi))
        return mi_normed
    else:
        return mi 
Example #20
Source File: _core.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
              column_num=None, stacking_id=None, **kwds):
        from scipy.stats import gaussian_kde
        from scipy import __version__ as spv

        y = remove_na_arraylike(y)

        if LooseVersion(spv) >= '0.11.0':
            gkde = gaussian_kde(y, bw_method=bw_method)
        else:
            gkde = gaussian_kde(y)
            if bw_method is not None:
                msg = ('bw_method was added in Scipy 0.11.0.' +
                       ' Scipy version in use is {spv}.'.format(spv=spv))
                warnings.warn(msg)

        y = gkde.evaluate(ind)
        lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
        return lines 
Example #21
Source File: test_kdeoth.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_gaussian_kde_monkeypatch():
    """Ugly, but people may rely on this.  See scipy pull request 123,
    specifically the linked ML thread "Width of the Gaussian in stats.kde".
    If it is necessary to break this later on, that is to be discussed on ML.
    """
    x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
    xs = np.linspace(-10, 10, num=50)

    # The old monkeypatched version to get at Silverman's Rule.
    kde = stats.gaussian_kde(x1)
    kde.covariance_factor = kde.silverman_factor
    kde._compute_covariance()
    y1 = kde(xs)

    # The new saner version.
    kde2 = stats.gaussian_kde(x1, bw_method='silverman')
    y2 = kde2(xs)

    assert_array_almost_equal_nulp(y1, y2, nulp=10) 
Example #22
Source File: kde_subclass.py    From rmats2sashimiplot with GNU General Public License v2.0 5 votes vote down vote up
def __init__(self, dataset, covfact = 'scotts'):
        self.covfact = covfact
        scipy.stats.gaussian_kde.__init__(self, dataset) 
Example #23
Source File: test_kdeoth.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_kde_bandwidth_method():
    def scotts_factor(kde_obj):
        """Same as default, just check that it works."""
        return np.power(kde_obj.n, -1./(kde_obj.d+4))

    np.random.seed(8765678)
    n_basesample = 50
    xn = np.random.randn(n_basesample)

    # Default
    gkde = stats.gaussian_kde(xn)
    # Supply a callable
    gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
    # Supply a scalar
    gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)

    xs = np.linspace(-7,7,51)
    kdepdf = gkde.evaluate(xs)
    kdepdf2 = gkde2.evaluate(xs)
    assert_almost_equal(kdepdf, kdepdf2)
    kdepdf3 = gkde3.evaluate(xs)
    assert_almost_equal(kdepdf, kdepdf3)

    assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')


# Subclasses that should stay working (extracted from various sources).
# Unfortunately the earlier design of gaussian_kde made it necessary for users
# to create these kinds of subclasses, or call _compute_covariance() directly. 
Example #24
Source File: render_utils.py    From pvnet-rendering with Apache License 2.0 5 votes vote down vote up
def sample_poses(self):
        eulers, translations = self.get_dataset_poses()
        num_samples = cfg.NUM_SYN
        azimuths, elevations = self.sample_sphere(num_samples)
        euler_sampler = stats.gaussian_kde(eulers.T)
        eulers = euler_sampler.resample(num_samples).T
        eulers[:, 0] = azimuths
        eulers[:, 1] = elevations
        translation_sampler = stats.gaussian_kde(translations.T)
        translations = translation_sampler.resample(num_samples).T
        np.save(self.blender_poses_path, np.concatenate([eulers, translations], axis=-1)) 
Example #25
Source File: plot.py    From lang2program with Apache License 2.0 5 votes vote down vote up
def plot_pdf(x, cov_factor=None, *args, **kwargs):
    import matplotlib.pyplot as plt
    from scipy.stats import gaussian_kde
    density = gaussian_kde(x)
    xgrid = np.linspace(min(x), max(x), 200)
    if cov_factor is not None:
        density.covariance_factor = lambda: cov_factor
        density._compute_covariance()
    y = density(xgrid)
    plt.plot(xgrid, y, *args, **kwargs) 
Example #26
Source File: kde.py    From carl with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def rvs(self, n_samples, random_state=None, **kwargs):
        # XXX gaussian_kde uses Numpy global random state...
        return self.kde_.resample(n_samples).T 
Example #27
Source File: kdecovclass.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, dataset, covariance):
        self.covariance = covariance
        scipy.stats.gaussian_kde.__init__(self, dataset) 
Example #28
Source File: plot.py    From lang2program with Apache License 2.0 5 votes vote down vote up
def plot_pdf(x, cov_factor=None, *args, **kwargs):
    import matplotlib.pyplot as plt
    from scipy.stats import gaussian_kde
    density = gaussian_kde(x)
    xgrid = np.linspace(min(x), max(x), 200)
    if cov_factor is not None:
        density.covariance_factor = lambda: cov_factor
        density._compute_covariance()
    y = density(xgrid)
    plt.plot(xgrid, y, *args, **kwargs) 
Example #29
Source File: kdecovclass.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, dataset, covfact = 'scotts'):
        self.covfact = covfact
        scipy.stats.gaussian_kde.__init__(self, dataset) 
Example #30
Source File: analysis.py    From reportgen with MIT License 5 votes vote down vote up
def distributions(a,hist=True,bins=None,norm_hist=True,kde=False,grid=None,gridsize=100,clip=None):
    '''数组的分布信息
    hist=True,则返回分布直方图(counts,bins)
    kde=True,则返回核密度估计数组(grid,y)

    example
    -------
    a=np.random.randint(1,50,size=(1000,1))
    '''
    a = np.asarray(a).squeeze()
    if hist:
        if bins is None:
            bins = min(_freedman_diaconis_bins(a), 50)
        counts,bins=np.histogram(a,bins=bins)
        if norm_hist:
            counts=counts/counts.sum()
    if kde:
        bw='scott'
        cut=3
        if clip is None:
            clip = (-np.inf, np.inf)
        try:
            kdemodel = stats.gaussian_kde(a, bw_method=bw)
        except TypeError:
            kdemodel = stats.gaussian_kde(a)
        bw = "scotts" if bw == "scott" else bw
        bw = getattr(kdemodel, "%s_factor" % bw)() * np.std(a)
        if grid is None:
            support_min = max(a.min() - bw * cut, clip[0])
            support_max = min(a.max() + bw * cut, clip[1])
            grid=np.linspace(support_min, support_max, gridsize)
        y = kdemodel(grid)
    if hist and not(kde):
        return counts,bins
    elif not(hist) and kde:
        return grid,y
    elif hist and kde:
        return ((counts,bins),(grid,y))
    else:
        return None