Python scipy.stats.norm.sf() Examples
The following are 18
code examples of scipy.stats.norm.sf().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.stats.norm
, or try the search function
.
Example #1
Source File: thresholding.py From nistats with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _true_positive_fraction(z_vals, hommel_value, alpha): """Given a bunch of z-avalues, return the true positive fraction Parameters ---------- z_vals: array, a set of z-variates from which the FDR is computed hommel_value: int, the Hommel value, used in the computations alpha: float, desired FDR control Returns ------- threshold: float, Estimated true positive fraction in the set of values """ z_vals_ = - np.sort(- z_vals) p_vals = norm.sf(z_vals_) n_samples = len(p_vals) c = np.ceil((hommel_value * p_vals) / alpha) unique_c, counts = np.unique(c, return_counts=True) criterion = 1 - unique_c + np.cumsum(counts) proportion_true_discoveries = np.maximum(0, criterion.max() / n_samples) return proportion_true_discoveries
Example #2
Source File: nonparametric.py From hypothetical with MIT License | 5 votes |
def _p_value(self): r""" Returns the p-value of the Freidman test. Returns ------- pval: float The p-value of the Friedman test statistic given a chi-square distribution. """ pval = chi2.sf(self.xr2, self.k - 1) return pval
Example #3
Source File: general.py From audit-ai with MIT License | 5 votes |
def two_tailed_ztest(success1, success2, total1, total2): """ Two-tailed z score for proportions Parameters ------- success1 : int the number of success in `total1` trials/observations success2 : int the number of success in `total2` trials/observations total1 : int the number of trials or observations of class 1 total2 : int the number of trials or observations of class 2 Returns ------- zstat : float z score for two tailed z-test p_value : float p value for two tailed z-test """ p1 = success1 / float(total1) p2 = success2 / float(total2) p_pooled = (success1 + success2) / float(total1 + total2) obs_ratio = (1. / total1 + 1. / total2) var = p_pooled * (1 - p_pooled) * obs_ratio # calculate z-score using foregoing values zstat = (p1 - p2) / np.sqrt(var) # calculate associated p-value for 2-tailed normal distribution p_value = norm.sf(abs(zstat)) * 2 return zstat, p_value
Example #4
Source File: discrete_margins.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def pvalues(self): _check_at_is_all(self.margeff_options) return norm.sf(np.abs(self.tvalues)) * 2
Example #5
Source File: ar_model.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def pvalues(self): return norm.sf(np.abs(self.tvalues))*2
Example #6
Source File: mlemodel.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def pvalues(self): """ (array) The p-values associated with the z-statistics of the coefficients. Note that the coefficients are assumed to have a Normal distribution. """ return norm.sf(np.abs(self.zvalues)) * 2
Example #7
Source File: test_utils.py From nistats with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_z_score(): p = np.random.rand(10) assert_array_almost_equal(norm.sf(z_score(p)), p) # check the numerical precision for p in [1.e-250, 1 - 1.e-16]: assert_array_almost_equal(z_score(p), norm.isf(p)) assert_array_almost_equal(z_score(np.float32(1.e-100)), norm.isf(1.e-300))
Example #8
Source File: thresholding.py From nistats with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fdr_threshold(z_vals, alpha): """ return the Benjamini-Hochberg FDR threshold for the input z_vals Parameters ---------- z_vals: array, a set of z-variates from which the FDR is computed alpha: float, desired FDR control Returns ------- threshold: float, FDR-controling threshold from the Benjamini-Hochberg procedure """ if alpha < 0 or alpha > 1: raise ValueError( 'alpha should be between 0 and 1. {} was provided'.format(alpha)) z_vals_ = - np.sort(- z_vals) p_vals = norm.sf(z_vals_) n_samples = len(p_vals) pos = p_vals < alpha * np.linspace( .5 / n_samples, 1 - .5 / n_samples, n_samples) if pos.any(): return (z_vals_[pos][-1] - 1.e-12) return np.infty
Example #9
Source File: thresholding.py From nistats with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _compute_hommel_value(z_vals, alpha, verbose=False): """Compute the All-Resolution Inference hommel-value""" if alpha < 0 or alpha > 1: raise ValueError('alpha should be between 0 and 1') z_vals_ = - np.sort(- z_vals) p_vals = norm.sf(z_vals_) n_samples = len(p_vals) if len(p_vals) == 1: return p_vals[0] > alpha if p_vals[0] > alpha: return n_samples slopes = (alpha - p_vals[: - 1]) / np.arange(n_samples, 1, -1) slope = np.max(slopes) hommel_value = np.trunc(n_samples + (alpha - slope * n_samples) / slope) if verbose: try: from matplotlib import pyplot as plt except ImportError: warnings.warn('"verbose" option requires the package Matplotlib.' 'Please install it using `pip install matplotlib`.') else: plt.figure() plt.plot(p_vals, 'o') plt.plot([n_samples - hommel_value, n_samples], [0, alpha]) plt.plot([0, n_samples], [0, 0], 'k') plt.show(block=False) return np.minimum(hommel_value, n_samples)
Example #10
Source File: discrete_margins.py From vnpy_crypto with MIT License | 5 votes |
def pvalues(self): _check_at_is_all(self.margeff_options) return norm.sf(np.abs(self.tvalues)) * 2
Example #11
Source File: ar_model.py From vnpy_crypto with MIT License | 5 votes |
def pvalues(self): return norm.sf(np.abs(self.tvalues))*2
Example #12
Source File: mlemodel.py From vnpy_crypto with MIT License | 5 votes |
def pvalues(self): """ (array) The p-values associated with the z-statistics of the coefficients. Note that the coefficients are assumed to have a Normal distribution. """ return norm.sf(np.abs(self.zvalues)) * 2
Example #13
Source File: nonparametric.py From hypothetical with MIT License | 5 votes |
def _test_statistic(self): r""" Returns the Van der Waerden test statistic, :math:`T_1` and the associated p-value. Returns ------- t1 : float The Van der Waerden test statistic p_value : float The computed p-value Notes ----- The Van der Waerden test statistic, :math:`T_1` is defined as: .. math:: T_1 = \frac{1}{s^2} \sum^k_{i=1} n_i (\bar{A}_i)^2 References ---------- Conover, W. J. (1999). Practical Nonparameteric Statistics (Third ed.). Wiley. Wikipedia contributors. "Van der Waerden test." Wikipedia, The Free Encyclopedia. Wikipedia, The Free Encyclopedia, 8 Feb. 2017. Web. 8 Mar. 2020. """ average_scores = np.array([i for _, i in self.average_scores]) t1 = np.sum(self._group_obs * average_scores ** 2) / self.score_variance p_value = chi2.sf(t1, self.k - 1) return t1, p_value # def _min_significant_difference(self): # mse = self.score_variance * ((self.n - 1 - self.test_statistic) / (self.n - self.k)) # # msd = t.ppf(1 - self.alpha / 2, self.n - self.k) * np.sqrt(2 * mse / self.k) # # return msd
Example #14
Source File: circular.py From pingouin with GNU General Public License v3.0 | 4 votes |
def circ_corrcl(x, y, tail='two-sided'): """Correlation coefficient between one circular and one linear variable random variables. Parameters ---------- x : 1-D array_like First circular variable (expressed in radians). The range of ``x`` must be either :math:`[0, 2\\pi]` or :math:`[-\\pi, \\pi]`. If ``angles`` is not expressed in radians (e.g. degrees or 24-hours), please use the :py:func:`pingouin.convert_angles` function prior to using the present function. y : 1-D array_like Second circular variable (linear) tail : string Specify whether to return 'one-sided' or 'two-sided' p-value. Returns ------- r : float Correlation coefficient pval : float Uncorrected p-value Notes ----- Please note that NaN are automatically removed from datasets. Examples -------- Compute the r and p-value between one circular and one linear variables. >>> from pingouin import circ_corrcl >>> x = [0.785, 1.570, 3.141, 0.839, 5.934] >>> y = [1.593, 1.291, -0.248, -2.892, 0.102] >>> r, pval = circ_corrcl(x, y) >>> print(round(r, 3), round(pval, 3)) 0.109 0.971 """ from scipy.stats import pearsonr, chi2 x = np.asarray(x) y = np.asarray(y) assert x.size == y.size, 'x and y must have the same length.' # Remove NA x, y = remove_na(x, y, paired=True) n = x.size # Compute correlation coefficent for sin and cos independently rxs = pearsonr(y, np.sin(x))[0] rxc = pearsonr(y, np.cos(x))[0] rcs = pearsonr(np.sin(x), np.cos(x))[0] # Compute angular-linear correlation (equ. 27.47) r = np.sqrt((rxc**2 + rxs**2 - 2 * rxc * rxs * rcs) / (1 - rcs**2)) # Compute p-value pval = chi2.sf(n * r**2, 2) pval = pval / 2 if tail == 'one-sided' else pval return r, pval
Example #15
Source File: utils.py From scvelo with BSD 3-Clause "New" or "Revised" License | 4 votes |
def test_bimodality(x, bins=30, kde=True, plot=False): """Test for bimodal distribution. """ from scipy.stats import gaussian_kde, norm lb, ub = np.min(x), np.percentile(x, 99.9) grid = np.linspace(lb, ub if ub <= lb else np.max(x), bins) kde_grid = ( gaussian_kde(x)(grid) if kde else np.histogram(x, bins=grid, density=True)[0] ) idx = int(bins / 2) - 2 idx += np.argmin(kde_grid[idx : idx + 4]) peak_0 = kde_grid[:idx].argmax() peak_1 = kde_grid[idx:].argmax() kde_peak = kde_grid[idx:][ peak_1 ] # min(kde_grid[:idx][peak_0], kde_grid[idx:][peak_1]) kde_mid = kde_grid[idx:].mean() # kde_grid[idx] t_stat = (kde_peak - kde_mid) / np.clip(np.std(kde_grid) / np.sqrt(bins), 1, None) p_val = norm.sf(t_stat) grid_0 = grid[:idx] grid_1 = grid[idx:] means = [ (grid_0[peak_0] + grid_0[min(peak_0 + 1, len(grid_0) - 1)]) / 2, (grid_1[peak_1] + grid_1[min(peak_1 + 1, len(grid_1) - 1)]) / 2, ] if plot: color = "grey" if kde: pl.plot(grid, kde_grid, color=color) pl.fill_between(grid, 0, kde_grid, alpha=0.4, color=color) else: pl.hist(x, bins=grid, alpha=0.4, density=True, color=color) pl.axvline(means[0], color=color) pl.axvline(means[1], color=color) pl.axhline(kde_mid, alpha=0.2, linestyle="--", color=color) pl.show() return t_stat, p_val, means # ~ t_test (reject unimodality if t_stat > 3)
Example #16
Source File: nonparametric.py From hypothetical with MIT License | 4 votes |
def _runs_test(self): r""" Primary method for performing the one-sample runs test. Returns ------- dict Dictionary containing relevant test statistics of the one-sample runs test. """ n1, n2 = find_repeats(pd.factorize(self.x)[0]).counts r_range = np.arange(2, self.r + 1) evens = r_range[r_range % 2 == 0] odds = r_range[r_range % 2 != 0] p_even = 1 / comb(n1 + n2, n1) * np.sum(2 * comb(n1 - 1, evens / 2 - 1) * comb(n2 - 1, evens / 2 - 1)) p_odd = 1 / comb(n1 + n2, n1) * np.sum(comb(n1 - 1, odds - 1) * comb(n2 - 1, odds - 2) + comb(n1 - 1, odds - 2) * comb(n2 - 1, odds - 1)) p = p_even + p_odd if all(np.array([n1, n2]) <= 20): r_crit_1, r_crit_2 = r_critical_value(n1, n2) test_summary = { 'probability': p, 'r critical value 1': r_crit_1, 'r critical value 2': r_crit_2 } return test_summary else: mean = (2 * n1 * n2) / (n1 + n2) + 1 sd = np.sqrt((2 * n1 * n2 * (2 * n1 * n2 - n1 - n2)) / ((n1 + n2) ** 2 * (n1 + n2 - 1))) z = (np.abs(self.r - mean) - self.continuity * 0.5) / sd p_val = norm.sf(z) * 2 test_summary = { 'probability': p, 'mean of runs': mean, 'standard deviation of runs': sd, 'z-value': z, 'p-value': p_val, 'continuity': self.continuity } return test_summary
Example #17
Source File: CohensDCalculator.py From scattertext with Apache License 2.0 | 4 votes |
def get_cohens_d_df(self, cat_X, ncat_X, orig_cat_X, orig_ncat_X, correction_method=None): empty_cat_X_smoothing_doc = np.zeros((1, cat_X.shape[1])) empty_ncat_X_smoothing_doc = np.zeros((1, ncat_X.shape[1])) smoothed_cat_X = np.vstack([empty_cat_X_smoothing_doc, cat_X]) smoothed_ncat_X = np.vstack([empty_ncat_X_smoothing_doc, ncat_X]) n1, n2 = float(smoothed_cat_X.shape[0]), float(smoothed_ncat_X.shape[0]) n = n1 + n2 #print(cat_X.shape, type(cat_X)) m1 = cat_X.mean(axis=0).A1 if type(cat_X) == np.matrix else cat_X.mean(axis=0) m2 = ncat_X.mean(axis=0).A1 if type(ncat_X) == np.matrix else ncat_X.mean(axis=0) v1 = smoothed_cat_X.var(axis=0).A1 if type(smoothed_cat_X) == np.matrix else smoothed_cat_X.mean(axis=0) v2 = smoothed_ncat_X.var(axis=0).A1 if type(smoothed_ncat_X) == np.matrix else smoothed_ncat_X.mean(axis=0) s_pooled = np.sqrt(((n2 - 1) * v2 + (n1 - 1) * v1) / (n - 2.)) cohens_d = (m1 - m2) / s_pooled cohens_d_se = np.sqrt(((n - 1.) / (n - 3)) * (4. / n) * (1 + np.square(cohens_d) / 8.)) cohens_d_z = cohens_d / cohens_d_se cohens_d_p = norm.sf(cohens_d_z) hedges_r = cohens_d * (1 - 3. / ((4. * (n - 2)) - 1)) hedges_r_se = np.sqrt(n / (n1 * n2) + np.square(hedges_r) / (n - 2.)) hedges_r_z = hedges_r / hedges_r_se hedges_r_p = norm.sf(hedges_r_z) score_df = pd.DataFrame({ 'cohens_d': cohens_d, 'cohens_d_se': cohens_d_se, 'cohens_d_z': cohens_d_z, 'cohens_d_p': cohens_d_p, 'hedges_r': hedges_r, 'hedges_r_se': hedges_r_se, 'hedges_r_z': hedges_r_z, 'hedges_r_p': hedges_r_p, 'm1': m1, 'm2': m2, 'count1': orig_cat_X.sum(axis=0).A1, 'count2': orig_ncat_X.sum(axis=0).A1, 'docs1': (orig_cat_X > 0).sum(axis=0).A1, 'docs2': (orig_ncat_X > 0).sum(axis=0).A1, }).fillna(0) if correction_method is not None: from statsmodels.stats.multitest import multipletests score_df['hedges_r_p_corr'] = 0.5 for method in ['cohens_d', 'hedges_r']: score_df[method + '_p_corr'] = 0.5 pvals = score_df.loc[(score_df['m1'] != 0) | (score_df['m2'] != 0), method + '_p'] pvals = np.min(np.array([pvals, 1. - pvals])) * 2. score_df.loc[(score_df['m1'] != 0) | (score_df['m2'] != 0), method + '_p_corr'] = ( multipletests(pvals, method=correction_method)[1] ) return score_df
Example #18
Source File: tm_massunivariatemodels.py From TFCE_mediation with GNU General Public License v3.0 | 4 votes |
def full_glm_results(endog_arr, exog_vars, return_resids = False, only_tvals = False, PCA_whiten = False, ZCA_whiten = False, orthogonalize = True, orthogNear = False, orthog_GramSchmidt = False): if np.mean(exog_vars[:,0])!=1: print("Warning: the intercept is not included as the first column in your exogenous variable array") n, num_depv = endog_arr.shape k = exog_vars.shape[1] if orthogonalize: exog_vars = sm.add_constant(orthog_columns(exog_vars[:,1:])) elif orthogNear: exog_vars = sm.add_constant(ortho_neareast(exog_vars[:,1:])) elif orthog_GramSchmidt: # for when order matters AKA type 2 sum of squares exog_vars = sm.add_constant(gram_schmidt_orthonorm(exog_vars[:,1:])) else: pass invXX = np.linalg.inv(np.dot(exog_vars.T, exog_vars)) DFbetween = k - 1 # aka df model DFwithin = n - k # aka df residuals DFtotal = n - 1 if PCA_whiten: endog_arr = PCAwhiten(endog_arr) if ZCA_whiten: endog_arr = ZCAwhiten(endog_arr) a = cy_lin_lstsqr_mat(exog_vars, endog_arr) sigma2 = np.sum((endog_arr - np.dot(exog_vars,a))**2,axis=0) / (n - k) se = se_of_slope(num_depv,invXX,sigma2,k) if only_tvals: return a / se else: resids = endog_arr - np.dot(exog_vars,a) RSS = np.sum(resids**2,axis=0) TSS = np.sum((endog_arr - np.mean(endog_arr, axis =0))**2, axis = 0) R2 = 1 - (RSS/TSS) std_y = np.sqrt(TSS/DFtotal) R2_adj = 1 - ((1-R2)*DFtotal/(DFwithin)) Fvalues = ((TSS-RSS)/(DFbetween))/(RSS/DFwithin) Tvalues = a / se Pvalues = t.sf(np.abs(Tvalues), DFtotal)*2 if return_resids: fitted = np.dot(exog_vars, a) return (Fvalues, Tvalues, Pvalues, R2, R2_adj, np.array(resids), np.array(fitted)) else: return (Fvalues, Tvalues, Pvalues, R2, R2_adj)