Python scipy.stats.norm.sf() Examples

The following are 18 code examples of scipy.stats.norm.sf(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats.norm , or try the search function .
Example #1
Source File: thresholding.py    From nistats with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _true_positive_fraction(z_vals, hommel_value, alpha):
    """Given a bunch of z-avalues, return the true positive fraction

    Parameters
    ----------
    z_vals: array,
            a set of z-variates from which the FDR is computed
    hommel_value: int,
           the Hommel value, used in the computations
    alpha: float,
           desired FDR control

    Returns
    -------
    threshold: float,
               Estimated true positive fraction in the set of values
    """
    z_vals_ = - np.sort(- z_vals)
    p_vals = norm.sf(z_vals_)
    n_samples = len(p_vals)
    c = np.ceil((hommel_value * p_vals) / alpha)
    unique_c, counts = np.unique(c, return_counts=True)
    criterion = 1 - unique_c + np.cumsum(counts)
    proportion_true_discoveries = np.maximum(0, criterion.max() / n_samples)
    return proportion_true_discoveries 
Example #2
Source File: nonparametric.py    From hypothetical with MIT License 5 votes vote down vote up
def _p_value(self):
        r"""
        Returns the p-value of the Freidman test.

        Returns
        -------
        pval: float
            The p-value of the Friedman test statistic given a chi-square distribution.

        """
        pval = chi2.sf(self.xr2, self.k - 1)

        return pval 
Example #3
Source File: general.py    From audit-ai with MIT License 5 votes vote down vote up
def two_tailed_ztest(success1, success2, total1, total2):
    """
    Two-tailed z score for proportions

    Parameters
    -------
    success1 : int
        the number of success in `total1` trials/observations

    success2 : int
        the number of success in `total2` trials/observations

    total1 : int
        the number of trials or observations of class 1

    total2 : int
        the number of trials or observations of class 2

    Returns
    -------
    zstat : float
        z score for two tailed z-test
    p_value : float
        p value for two tailed z-test
    """
    p1 = success1 / float(total1)
    p2 = success2 / float(total2)
    p_pooled = (success1 + success2) / float(total1 + total2)

    obs_ratio = (1. / total1 + 1. / total2)
    var = p_pooled * (1 - p_pooled) * obs_ratio

    # calculate z-score using foregoing values
    zstat = (p1 - p2) / np.sqrt(var)

    # calculate associated p-value for 2-tailed normal distribution
    p_value = norm.sf(abs(zstat)) * 2

    return zstat, p_value 
Example #4
Source File: discrete_margins.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def pvalues(self):
        _check_at_is_all(self.margeff_options)
        return norm.sf(np.abs(self.tvalues)) * 2 
Example #5
Source File: ar_model.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def pvalues(self):
        return norm.sf(np.abs(self.tvalues))*2 
Example #6
Source File: mlemodel.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def pvalues(self):
        """
        (array) The p-values associated with the z-statistics of the
        coefficients. Note that the coefficients are assumed to have a Normal
        distribution.
        """
        return norm.sf(np.abs(self.zvalues)) * 2 
Example #7
Source File: test_utils.py    From nistats with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_z_score():
    p = np.random.rand(10)
    assert_array_almost_equal(norm.sf(z_score(p)), p)
    # check the numerical precision
    for p in [1.e-250, 1 - 1.e-16]:
        assert_array_almost_equal(z_score(p), norm.isf(p))
    assert_array_almost_equal(z_score(np.float32(1.e-100)), norm.isf(1.e-300)) 
Example #8
Source File: thresholding.py    From nistats with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fdr_threshold(z_vals, alpha):
    """ return the Benjamini-Hochberg FDR threshold for the input z_vals

    Parameters
    ----------
    z_vals: array,
            a set of z-variates from which the FDR is computed
    alpha: float,
           desired FDR control

    Returns
    -------
    threshold: float,
               FDR-controling threshold from the Benjamini-Hochberg procedure
    """
    if alpha < 0 or alpha > 1:
        raise ValueError(
            'alpha should be between 0 and 1. {} was provided'.format(alpha))
    z_vals_ = - np.sort(- z_vals)
    p_vals = norm.sf(z_vals_)
    n_samples = len(p_vals)
    pos = p_vals < alpha * np.linspace(
        .5 / n_samples, 1 - .5 / n_samples, n_samples)
    if pos.any():
        return (z_vals_[pos][-1] - 1.e-12)

    return np.infty 
Example #9
Source File: thresholding.py    From nistats with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _compute_hommel_value(z_vals, alpha, verbose=False):
    """Compute the All-Resolution Inference hommel-value"""
    if alpha < 0 or alpha > 1:
        raise ValueError('alpha should be between 0 and 1')
    z_vals_ = - np.sort(- z_vals)
    p_vals = norm.sf(z_vals_)
    n_samples = len(p_vals)

    if len(p_vals) == 1:
        return p_vals[0] > alpha
    if p_vals[0] > alpha:
        return n_samples
    slopes = (alpha - p_vals[: - 1]) / np.arange(n_samples, 1, -1)
    slope = np.max(slopes)
    hommel_value = np.trunc(n_samples + (alpha - slope * n_samples) / slope)
    if verbose:
        try:
            from matplotlib import pyplot as plt
        except ImportError:
            warnings.warn('"verbose" option requires the package Matplotlib.'
                          'Please install it using `pip install matplotlib`.')
        else:
            plt.figure()
            plt.plot(p_vals, 'o')
            plt.plot([n_samples - hommel_value, n_samples], [0, alpha])
            plt.plot([0, n_samples], [0, 0], 'k')
            plt.show(block=False)
    return np.minimum(hommel_value, n_samples) 
Example #10
Source File: discrete_margins.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def pvalues(self):
        _check_at_is_all(self.margeff_options)
        return norm.sf(np.abs(self.tvalues)) * 2 
Example #11
Source File: ar_model.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def pvalues(self):
        return norm.sf(np.abs(self.tvalues))*2 
Example #12
Source File: mlemodel.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def pvalues(self):
        """
        (array) The p-values associated with the z-statistics of the
        coefficients. Note that the coefficients are assumed to have a Normal
        distribution.
        """
        return norm.sf(np.abs(self.zvalues)) * 2 
Example #13
Source File: nonparametric.py    From hypothetical with MIT License 5 votes vote down vote up
def _test_statistic(self):
        r"""
        Returns the Van der Waerden test statistic, :math:`T_1` and the associated p-value.

        Returns
        -------
        t1 : float
            The Van der Waerden test statistic
        p_value : float
            The computed p-value

        Notes
        -----
        The Van der Waerden test statistic, :math:`T_1` is defined as:

        .. math::

            T_1 = \frac{1}{s^2} \sum^k_{i=1} n_i (\bar{A}_i)^2

        References
        ----------
        Conover, W. J. (1999). Practical Nonparameteric Statistics (Third ed.). Wiley.

        Wikipedia contributors. "Van der Waerden test." Wikipedia, The Free Encyclopedia.
            Wikipedia, The Free Encyclopedia, 8 Feb. 2017. Web. 8 Mar. 2020.

        """
        average_scores = np.array([i for _, i in self.average_scores])
        t1 = np.sum(self._group_obs * average_scores ** 2) / self.score_variance

        p_value = chi2.sf(t1, self.k - 1)

        return t1, p_value

    # def _min_significant_difference(self):
    #     mse = self.score_variance * ((self.n - 1 - self.test_statistic) / (self.n - self.k))
    #
    #     msd = t.ppf(1 - self.alpha / 2, self.n - self.k) * np.sqrt(2 * mse / self.k)
    #
    #     return msd 
Example #14
Source File: circular.py    From pingouin with GNU General Public License v3.0 4 votes vote down vote up
def circ_corrcl(x, y, tail='two-sided'):
    """Correlation coefficient between one circular and one linear variable
    random variables.

    Parameters
    ----------
    x : 1-D array_like
        First circular variable (expressed in radians).
        The range of ``x`` must be either :math:`[0, 2\\pi]` or
        :math:`[-\\pi, \\pi]`. If ``angles`` is not
        expressed in radians (e.g. degrees or 24-hours), please use the
        :py:func:`pingouin.convert_angles` function prior to using the present
        function.
    y : 1-D array_like
        Second circular variable (linear)
    tail : string
        Specify whether to return 'one-sided' or 'two-sided' p-value.

    Returns
    -------
    r : float
        Correlation coefficient
    pval : float
        Uncorrected p-value

    Notes
    -----
    Please note that NaN are automatically removed from datasets.

    Examples
    --------
    Compute the r and p-value between one circular and one linear variables.

    >>> from pingouin import circ_corrcl
    >>> x = [0.785, 1.570, 3.141, 0.839, 5.934]
    >>> y = [1.593, 1.291, -0.248, -2.892, 0.102]
    >>> r, pval = circ_corrcl(x, y)
    >>> print(round(r, 3), round(pval, 3))
    0.109 0.971
    """
    from scipy.stats import pearsonr, chi2
    x = np.asarray(x)
    y = np.asarray(y)
    assert x.size == y.size, 'x and y must have the same length.'

    # Remove NA
    x, y = remove_na(x, y, paired=True)
    n = x.size

    # Compute correlation coefficent for sin and cos independently
    rxs = pearsonr(y, np.sin(x))[0]
    rxc = pearsonr(y, np.cos(x))[0]
    rcs = pearsonr(np.sin(x), np.cos(x))[0]

    # Compute angular-linear correlation (equ. 27.47)
    r = np.sqrt((rxc**2 + rxs**2 - 2 * rxc * rxs * rcs) / (1 - rcs**2))

    # Compute p-value
    pval = chi2.sf(n * r**2, 2)
    pval = pval / 2 if tail == 'one-sided' else pval
    return r, pval 
Example #15
Source File: utils.py    From scvelo with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_bimodality(x, bins=30, kde=True, plot=False):
    """Test for bimodal distribution.
    """
    from scipy.stats import gaussian_kde, norm

    lb, ub = np.min(x), np.percentile(x, 99.9)
    grid = np.linspace(lb, ub if ub <= lb else np.max(x), bins)
    kde_grid = (
        gaussian_kde(x)(grid) if kde else np.histogram(x, bins=grid, density=True)[0]
    )

    idx = int(bins / 2) - 2
    idx += np.argmin(kde_grid[idx : idx + 4])

    peak_0 = kde_grid[:idx].argmax()
    peak_1 = kde_grid[idx:].argmax()
    kde_peak = kde_grid[idx:][
        peak_1
    ]  # min(kde_grid[:idx][peak_0], kde_grid[idx:][peak_1])
    kde_mid = kde_grid[idx:].mean()  # kde_grid[idx]

    t_stat = (kde_peak - kde_mid) / np.clip(np.std(kde_grid) / np.sqrt(bins), 1, None)
    p_val = norm.sf(t_stat)

    grid_0 = grid[:idx]
    grid_1 = grid[idx:]
    means = [
        (grid_0[peak_0] + grid_0[min(peak_0 + 1, len(grid_0) - 1)]) / 2,
        (grid_1[peak_1] + grid_1[min(peak_1 + 1, len(grid_1) - 1)]) / 2,
    ]

    if plot:
        color = "grey"
        if kde:
            pl.plot(grid, kde_grid, color=color)
            pl.fill_between(grid, 0, kde_grid, alpha=0.4, color=color)
        else:
            pl.hist(x, bins=grid, alpha=0.4, density=True, color=color)
        pl.axvline(means[0], color=color)
        pl.axvline(means[1], color=color)
        pl.axhline(kde_mid, alpha=0.2, linestyle="--", color=color)
        pl.show()

    return t_stat, p_val, means  # ~ t_test (reject unimodality if t_stat > 3) 
Example #16
Source File: nonparametric.py    From hypothetical with MIT License 4 votes vote down vote up
def _runs_test(self):
        r"""
        Primary method for performing the one-sample runs test.

        Returns
        -------
        dict
            Dictionary containing relevant test statistics of the one-sample runs test.

        """
        n1, n2 = find_repeats(pd.factorize(self.x)[0]).counts

        r_range = np.arange(2, self.r + 1)
        evens = r_range[r_range % 2 == 0]
        odds = r_range[r_range % 2 != 0]

        p_even = 1 / comb(n1 + n2, n1) * np.sum(2 * comb(n1 - 1, evens / 2 - 1) * comb(n2 - 1, evens / 2 - 1))

        p_odd = 1 / comb(n1 + n2, n1) * np.sum(comb(n1 - 1, odds - 1) * comb(n2 - 1, odds - 2) +
                                               comb(n1 - 1, odds - 2) * comb(n2 - 1, odds - 1))

        p = p_even + p_odd

        if all(np.array([n1, n2]) <= 20):
            r_crit_1, r_crit_2 = r_critical_value(n1, n2)

            test_summary = {
                'probability': p,
                'r critical value 1': r_crit_1,
                'r critical value 2': r_crit_2
            }
            return test_summary

        else:
            mean = (2 * n1 * n2) / (n1 + n2) + 1
            sd = np.sqrt((2 * n1 * n2 * (2 * n1 * n2 - n1 - n2)) / ((n1 + n2) ** 2 * (n1 + n2 - 1)))
            z = (np.abs(self.r - mean) - self.continuity * 0.5) / sd
            p_val = norm.sf(z) * 2

            test_summary = {
                'probability': p,
                'mean of runs': mean,
                'standard deviation of runs': sd,
                'z-value': z,
                'p-value': p_val,
                'continuity': self.continuity
            }

            return test_summary 
Example #17
Source File: CohensDCalculator.py    From scattertext with Apache License 2.0 4 votes vote down vote up
def get_cohens_d_df(self, cat_X, ncat_X, orig_cat_X, orig_ncat_X, correction_method=None):
        empty_cat_X_smoothing_doc = np.zeros((1, cat_X.shape[1]))
        empty_ncat_X_smoothing_doc = np.zeros((1, ncat_X.shape[1]))
        smoothed_cat_X = np.vstack([empty_cat_X_smoothing_doc, cat_X])
        smoothed_ncat_X = np.vstack([empty_ncat_X_smoothing_doc, ncat_X])
        n1, n2 = float(smoothed_cat_X.shape[0]), float(smoothed_ncat_X.shape[0])
        n = n1 + n2
        #print(cat_X.shape, type(cat_X))
        m1 = cat_X.mean(axis=0).A1 if type(cat_X) == np.matrix else cat_X.mean(axis=0)
        m2 = ncat_X.mean(axis=0).A1 if type(ncat_X) == np.matrix else ncat_X.mean(axis=0)
        v1 = smoothed_cat_X.var(axis=0).A1 if type(smoothed_cat_X) == np.matrix else smoothed_cat_X.mean(axis=0)
        v2 = smoothed_ncat_X.var(axis=0).A1 if type(smoothed_ncat_X) == np.matrix else smoothed_ncat_X.mean(axis=0)
        s_pooled = np.sqrt(((n2 - 1) * v2 + (n1 - 1) * v1) / (n - 2.))
        cohens_d = (m1 - m2) / s_pooled
        cohens_d_se = np.sqrt(((n - 1.) / (n - 3)) * (4. / n) * (1 + np.square(cohens_d) / 8.))
        cohens_d_z = cohens_d / cohens_d_se
        cohens_d_p = norm.sf(cohens_d_z)
        hedges_r = cohens_d * (1 - 3. / ((4. * (n - 2)) - 1))
        hedges_r_se = np.sqrt(n / (n1 * n2) + np.square(hedges_r) / (n - 2.))
        hedges_r_z = hedges_r / hedges_r_se
        hedges_r_p = norm.sf(hedges_r_z)
        score_df = pd.DataFrame({
            'cohens_d': cohens_d,
            'cohens_d_se': cohens_d_se,
            'cohens_d_z': cohens_d_z,
            'cohens_d_p': cohens_d_p,
            'hedges_r': hedges_r,
            'hedges_r_se': hedges_r_se,
            'hedges_r_z': hedges_r_z,
            'hedges_r_p': hedges_r_p,
            'm1': m1,
            'm2': m2,
            'count1': orig_cat_X.sum(axis=0).A1,
            'count2': orig_ncat_X.sum(axis=0).A1,
            'docs1': (orig_cat_X > 0).sum(axis=0).A1,
            'docs2': (orig_ncat_X > 0).sum(axis=0).A1,
        }).fillna(0)
        if correction_method is not None:
            from statsmodels.stats.multitest import multipletests
            score_df['hedges_r_p_corr'] = 0.5
            for method in ['cohens_d', 'hedges_r']:
                score_df[method + '_p_corr'] = 0.5
                pvals = score_df.loc[(score_df['m1'] != 0) | (score_df['m2'] != 0), method + '_p']
                pvals = np.min(np.array([pvals, 1. - pvals])) * 2.
                score_df.loc[(score_df['m1'] != 0) | (score_df['m2'] != 0), method + '_p_corr'] = (
                    multipletests(pvals, method=correction_method)[1]
                )
        return score_df 
Example #18
Source File: tm_massunivariatemodels.py    From TFCE_mediation with GNU General Public License v3.0 4 votes vote down vote up
def full_glm_results(endog_arr, exog_vars, return_resids = False, only_tvals = False, PCA_whiten = False, ZCA_whiten = False,  orthogonalize = True, orthogNear = False, orthog_GramSchmidt = False):
	if np.mean(exog_vars[:,0])!=1:
		print("Warning: the intercept is not included as the first column in your exogenous variable array")
	n, num_depv = endog_arr.shape
	k = exog_vars.shape[1]

	if orthogonalize:
		exog_vars = sm.add_constant(orthog_columns(exog_vars[:,1:]))
	elif orthogNear:
		exog_vars = sm.add_constant(ortho_neareast(exog_vars[:,1:]))
	elif orthog_GramSchmidt: # for when order matters AKA type 2 sum of squares
		exog_vars = sm.add_constant(gram_schmidt_orthonorm(exog_vars[:,1:]))
	else:
		pass

	invXX = np.linalg.inv(np.dot(exog_vars.T, exog_vars))

	DFbetween = k - 1 # aka df model
	DFwithin = n - k # aka df residuals
	DFtotal = n - 1
	if PCA_whiten:
		endog_arr = PCAwhiten(endog_arr)
	if ZCA_whiten:
		endog_arr = ZCAwhiten(endog_arr)

	a = cy_lin_lstsqr_mat(exog_vars, endog_arr)
	sigma2 = np.sum((endog_arr - np.dot(exog_vars,a))**2,axis=0) / (n - k)
	se = se_of_slope(num_depv,invXX,sigma2,k)

	if only_tvals:
		return a / se
	else:
		resids = endog_arr - np.dot(exog_vars,a)
		RSS = np.sum(resids**2,axis=0)
		TSS = np.sum((endog_arr - np.mean(endog_arr, axis =0))**2, axis = 0)
		R2 = 1 - (RSS/TSS)

		std_y = np.sqrt(TSS/DFtotal)
		R2_adj = 1 - ((1-R2)*DFtotal/(DFwithin))
		Fvalues = ((TSS-RSS)/(DFbetween))/(RSS/DFwithin)
		Tvalues = a / se
		Pvalues = t.sf(np.abs(Tvalues), DFtotal)*2
		if return_resids:
			fitted = np.dot(exog_vars, a)
			return (Fvalues, Tvalues, Pvalues, R2, R2_adj, np.array(resids), np.array(fitted))
		else:
			return (Fvalues, Tvalues, Pvalues, R2, R2_adj)