Python scipy.stats.ttest_1samp() Examples

The following are 30 code examples of scipy.stats.ttest_1samp(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: test_cond_indep.py    From causal-text-embeddings with MIT License 6 votes vote down vote up
def likelihood_ratio_hypothesis_test(df, term_counts, word_index):
	treated = df[df.treatment==1]
	control = df[df.treatment==0]
	null_model = train_classifier(df, term_counts, word_index, treat_index=None)
	model_st_treated = train_classifier(df, term_counts, word_index)
	model_st_not_treated = train_classifier(df, term_counts, word_index, treat_index=0)

	null_st_treated = compute_word_occur_prob(null_model, treated)
	null_st_not_treated = compute_word_occur_prob(null_model, control)
	prob_st_treated = compute_word_occur_prob(model_st_treated, treated)
	prob_st_not_treated = compute_word_occur_prob(model_st_not_treated)

	ratio_treated = np.log(null_st_treated / prob_st_treated)
	ratio_control = np.log(null_st_not_treated / prob_st_not_treated)
	ratios = np.hstack([ratio_treated,ratio_control])

	print("Mean and std. of log likelihood ratios:", ratios.mean(), ratios.std())
	return ttest_1samp(ratios, 0.0)
	
	# statistic = -2 * ratios.sum()
	# p_value = chi2.pdf(statistic, df=2)
	# return statistic, p_value 
Example #2
Source File: compare.py    From ccs-calendarserver with Apache License 2.0 6 votes vote down vote up
def main():
    [(_ignore_stat, first), (_ignore_stat, second)] = load_stats(sys.argv[1:])

    # Attempt to increase robustness by dropping the outlying 10% of values.
    first = trim(first, 0.1)
    second = trim(second, 0.1)

    fmean = stats.mean(first)
    smean = stats.mean(second)
    p = ttest_1samp(second, fmean)[1]
    if p >= 0.95:
        # rejected the null hypothesis
        print(sys.argv[1], 'mean of', fmean, 'differs from', sys.argv[2], 'mean of', smean, '(%2.0f%%)' % (p * 100,))
    else:
        # failed to reject the null hypothesis
        print('cannot prove means (%s, %s) differ (%2.0f%%)' % (fmean, smean, p * 100,)) 
Example #3
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_1samp(outcome[:, 0], 1)
        res2 = mstats.ttest_1samp(outcome[:, 0], 1)
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_1samp(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_1samp(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_1samp(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_1samp(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_1samp(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3) 
Example #4
Source File: multicomp.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def mcfdr(nrepl=100, nobs=50, ntests=10, ntrue=6, mu=0.5, alpha=0.05, rho=0.):
    '''MonteCarlo to test fdrcorrection
    '''
    nfalse = ntests - ntrue
    locs = np.array([0.]*ntrue + [mu]*(ntests - ntrue))
    results = []
    for i in range(nrepl):
        #rvs = locs + stats.norm.rvs(size=(nobs, ntests))
        rvs = locs + randmvn(rho, size=(nobs, ntests))
        tt, tpval = stats.ttest_1samp(rvs, 0)
        res = fdrcorrection_bak(np.abs(tpval), alpha=alpha, method='i')
        res0 = fdrcorrection0(np.abs(tpval), alpha=alpha)
        #res and res0 give the same results
        results.append([np.sum(res[:ntrue]), np.sum(res[ntrue:])] +
                       [np.sum(res0[:ntrue]), np.sum(res0[ntrue:])] +
                       res.tolist() +
                       np.sort(tpval).tolist() +
                       [np.sum(tpval[:ntrue]<alpha),
                        np.sum(tpval[ntrue:]<alpha)] +
                       [np.sum(tpval[:ntrue]<alpha/ntests),
                        np.sum(tpval[ntrue:]<alpha/ntests)])
    return np.array(results) 
Example #5
Source File: plotting.py    From QUANTAXIS with MIT License 6 votes vote down vote up
def plot_information_table(ic_data):
    """
    IC 统计量
    """
    ic_summary_table = pd.DataFrame()
    ic_summary_table["IC Mean"] = ic_data.mean()
    ic_summary_table["IC std."] = ic_data.std()
    ic_summary_table["Risk-Adjusted IC (IR)"] = ic_data.mean() / ic_data.std()
    t_stat, p_value = stats.ttest_1samp(ic_data, 0)
    ic_summary_table["t-stat (IC)"] = t_stat
    ic_summary_table["p-value (IC)"] = p_value
    ic_summary_table["IC Skew"] = stats.skew(ic_data)
    ic_summary_table["IC Kurtosis"] = stats.kurtosis(ic_data)

    print("Information Analysis")
    plotting_utils.print_table(ic_summary_table.apply(lambda x: x.round(3)).T) 
Example #6
Source File: test_cond_indep.py    From causal-text-embeddings with MIT License 6 votes vote down vote up
def compute_treatment_likelihood_ratio(df, term_counts, word_index):
	null_model = train_treatment_classifier(df, term_counts, word_index, occurence=None)
	model_st_word = train_treatment_classifier(df, term_counts, word_index)
	model_st_no_word = train_treatment_classifier(df, term_counts, word_index, occurence=0)

	word_occurence = term_counts[:,word_index].toarray()
	mask_word = (word_occurence == 1).flatten()
	mask_no_word = (word_occurence == 0).flatten()
	p_ind = np.arange(term_counts.shape[0])
	word_occurs = df[df.post_index.isin(p_ind[mask_word])]
	word_no_occurs = df[df.post_index.isin(p_ind[mask_no_word])]

	null_given_word = predict_treatment_prob_from_model(null_model, word_occurs)
	null_given_no_word = predict_treatment_prob_from_model(null_model, word_no_occurs)
	p_t_given_word = predict_treatment_prob_from_model(model_st_word, word_occurs)
	p_t_given_no_word = predict_treatment_prob_from_model(model_st_no_word, word_no_occurs)

	ratio_word = np.log(null_given_word / p_t_given_word)
	ratio_no_word = np.log(null_given_no_word / p_t_given_no_word)
	ratio = np.hstack([ratio_word, ratio_no_word])
	print("Mean and std of log likelihood ratios", ratio.mean(), ratio.std())
	# return -2*ratio.sum(), chi2.pdf(-2*ratio.sum(), df=2)
	return ttest_1samp(ratio, 0.0) 
Example #7
Source File: test_cond_indep.py    From causal-text-embeddings with MIT License 6 votes vote down vote up
def compute_expected_treatment_likelihood_ratio(df, term_counts, word_index, do_shuffle=False):
	word_occurence = term_counts[:,word_index].toarray()
	mask_word = (word_occurence == 1).flatten()
	p_ind = np.arange(term_counts.shape[0])
	n = p_ind[mask_word].shape[0]
	if do_shuffle:
		np.random.shuffle(p_ind)
		rand_indices = p_ind[:mask_word.shape[0]]
		df_word_occurs = df[df.post_index.isin(rand_indices)]
	else:
		df_word_occurs = df[df.post_index.isin(p_ind[mask_word])]
	
	model_p_z = train_treatment_classifier(df, term_counts, word_index, occurence=None)
	prob_t_given_z = predict_treatment_prob_from_model(model_p_z, df_word_occurs)
	model_st_word = train_treatment_classifier(df_word_occurs, term_counts, word_index,occurence=None)
	prob_st_word = predict_treatment_prob_from_model(model_st_word, df_word_occurs)
	log_ratio = np.log(prob_st_word/prob_t_given_z)
	print("Mean (and std err.) of ratio:", log_ratio.mean(), log_ratio.std()/np.sqrt(n))
	print("N = ", n)
	return ttest_1samp(log_ratio, 0.0) 
Example #8
Source File: test_stats.py    From Computable with MIT License 6 votes vote down vote up
def test_onesample(self):
        t, p = stats.ttest_1samp(self.X1, 0)

        assert_array_almost_equal(t, self.T1_0)
        assert_array_almost_equal(p, self.P1_0)

        t, p = stats.ttest_1samp(self.X2, 0)

        assert_array_almost_equal(t, self.T2_0)
        assert_array_almost_equal(p, self.P2_0)

        t, p = stats.ttest_1samp(self.X1, 1)

        assert_array_almost_equal(t, self.T1_1)
        assert_array_almost_equal(p, self.P1_1)

        t, p = stats.ttest_1samp(self.X1, 2)

        assert_array_almost_equal(t, self.T1_2)
        assert_array_almost_equal(p, self.P1_2) 
Example #9
Source File: multicomp.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def mcfdr(nrepl=100, nobs=50, ntests=10, ntrue=6, mu=0.5, alpha=0.05, rho=0.):
    '''MonteCarlo to test fdrcorrection
    '''
    nfalse = ntests - ntrue
    locs = np.array([0.]*ntrue + [mu]*(ntests - ntrue))
    results = []
    for i in range(nrepl):
        #rvs = locs + stats.norm.rvs(size=(nobs, ntests))
        rvs = locs + randmvn(rho, size=(nobs, ntests))
        tt, tpval = stats.ttest_1samp(rvs, 0)
        res = fdrcorrection_bak(np.abs(tpval), alpha=alpha, method='i')
        res0 = fdrcorrection0(np.abs(tpval), alpha=alpha)
        #res and res0 give the same results
        results.append([np.sum(res[:ntrue]), np.sum(res[ntrue:])] +
                       [np.sum(res0[:ntrue]), np.sum(res0[ntrue:])] +
                       res.tolist() +
                       np.sort(tpval).tolist() +
                       [np.sum(tpval[:ntrue]<alpha),
                        np.sum(tpval[ntrue:]<alpha)] +
                       [np.sum(tpval[:ntrue]<alpha/ntests),
                        np.sum(tpval[ntrue:]<alpha/ntests)])
    return np.array(results) 
Example #10
Source File: plotting.py    From jqfactor_analyzer with MIT License 5 votes vote down vote up
def plot_information_table(ic_data):
    ic_summary_table = pd.DataFrame()
    ic_summary_table["IC Mean"] = ic_data.mean()
    ic_summary_table["IC Std."] = ic_data.std()
    ic_summary_table["IR"] = ic_data.mean() / ic_data.std()
    t_stat, p_value = stats.ttest_1samp(ic_data, 0)
    ic_summary_table["t-stat(IC)"] = t_stat
    ic_summary_table["p-value(IC)"] = p_value
    ic_summary_table["IC Skew"] = stats.skew(ic_data)
    ic_summary_table["IC Kurtosis"] = stats.kurtosis(ic_data)

    print("IC 分析")
    print_table(ic_summary_table.apply(lambda x: x.round(3)).T) 
Example #11
Source File: _dp_verification.py    From whitenoise-system with MIT License 5 votes vote down vote up
def bias_test(self, actual, fD, sig_level = 0.05):
        """
        Given actual response, calculates mean signed deviation of noisy responses
        Also, performs 1-sample two tailed t-test to find whether 
        the difference between actual response and repeated noisy responses 
        is statistically significant i.e. biased result
        """
        n = len(fD)
        actual = [actual] * n
        diff = fD - actual
        msd = (np.sum(diff) / n) / actual[0]
        tset, pval = stats.ttest_1samp(diff, 0.0)
        return (pval >= sig_level), msd 
Example #12
Source File: BATSModel_test.py    From tbats with MIT License 5 votes vote down vote up
def test_fit_alpha_only(self):
        alpha = 0.7
        np.random.seed(345)
        T = 200

        l = l0 = 0.2
        y = [0] * T
        for t in range(0, T):
            d = np.random.normal()
            y[t] = l + d
            l = l + alpha * d

        c = Components(use_arma_errors=False)
        p = ModelParams(c, alpha=alpha, x0=np.array([l0]))
        model = self.create_model(p)
        fitted_model = model.fit(y)
        resid = fitted_model.resid

        # Residuals should form a normal distribution
        _, pvalue = stats.normaltest(resid)
        assert 0.05 < pvalue  # large p-value, we can not reject null hypothesis of normal distribution

        # Mean of residuals should be close to 0
        _, pvalue = stats.ttest_1samp(resid, popmean=0.0)
        assert 0.05 < pvalue  # large p-value we can not reject null hypothesis that mean is 0

        # We expect 95% of residuals to lie within [-2,2] interval
        assert len(resid[np.where(np.abs(resid) < 2)]) / len(resid) > 0.90 
Example #13
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_empty(self):
        res1 = mstats.ttest_1samp([], 1)
        assert_(np.all(np.isnan(res1))) 
Example #14
Source File: diagnostic.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def linear_harvey_collier(res):
    '''Harvey Collier test for linearity

    The Null hypothesis is that the regression is correctly modeled as linear.

    Parameters
    ----------
    res : Result instance

    Returns
    -------
    tvalue : float
        test statistic, based on ttest_1sample
    pvalue : float
        pvalue of the test

    Notes
    -----
    TODO: add sort_by option

    This test is a t-test that the mean of the recursive ols residuals is zero.
    Calculating the recursive residuals might take some time for large samples.

    '''
    #I think this has different ddof than
    #B.H. Baltagi, Econometrics, 2011, chapter 8
    #but it matches Gretl and R:lmtest, pvalue at decimal=13
    rr = recursive_olsresiduals(res, skip=3, alpha=0.95)
    from scipy import stats

    return stats.ttest_1samp(rr[3][3:], 0) 
Example #15
Source File: BATSParamsOptimizer_test.py    From tbats with MIT License 5 votes vote down vote up
def test_fit_alpha_only(self):
        alpha = 0.7
        np.random.seed(345)
        T = 200

        l = 0.2
        y = [0] * T
        for t in range(0, T):
            d = np.random.normal()
            y[t] = l + d
            l = l + alpha * d

        c = Components(use_arma_errors=False)
        p = ModelParams(c, alpha=0.09)  # default starting value for alpha
        optimizer = ParamsOptimizer(self.context)
        optimizer.optimize(y, p)
        fitted_model = optimizer.optimal_model()
        resid = fitted_model.resid

        # AGAIN why x0 estimation is so shitty, l is far from being 0.2

        # Residuals should form a normal distribution
        _, pvalue = stats.normaltest(resid)
        assert 0.05 < pvalue  # large p-value, we can not reject null hypothesis of normal distribution

        # Mean of residuals should be close to 0
        _, pvalue = stats.ttest_1samp(resid, popmean=0.0)
        assert 0.05 < pvalue  # large p-value we can not reject null hypothesis that mean is 0

        # We expect 95% of residuals to lie within [-2,2] interval
        assert len(resid[np.where(np.abs(resid) < 2)]) / len(resid) > 0.90 
Example #16
Source File: TBATSModel_test.py    From tbats with MIT License 5 votes vote down vote up
def test_fit_alpha_only(self):
        alpha = 0.7
        np.random.seed(345)
        T = 200

        l = l0 = 0.2
        y = [0] * T
        for t in range(0, T):
            d = np.random.normal()
            y[t] = l + d
            l = l + alpha * d

        c = Components(use_arma_errors=False)
        p = ModelParams(c, alpha=alpha, x0=np.array([l0]))
        model = self.create_model(p)
        fitted_model = model.fit(y)
        resid = fitted_model.resid

        # Residuals should form a normal distribution
        _, pvalue = stats.normaltest(resid)
        assert 0.05 < pvalue  # large p-value, we can not reject null hypothesis of normal distribution

        # Mean of residuals should be close to 0
        _, pvalue = stats.ttest_1samp(resid, popmean=0.0)
        assert 0.05 < pvalue  # large p-value we can not reject null hypothesis that mean is 0

        # We expect 95% of residuals to lie within [-2,2] interval
        assert len(resid[np.where(np.abs(resid) < 2)]) / len(resid) > 0.90 
Example #17
Source File: accounting.py    From pysystemtrade with GNU General Public License v3.0 5 votes vote down vote up
def t_test(self):
        return ttest_1samp(self.vals(), 0.0) 
Example #18
Source File: stats_test.py    From bayesmark with Apache License 2.0 5 votes vote down vote up
def t_test_(x):
    """Perform a standard t-test to test if the values in `x` are sampled from
    a distribution with a zero mean.

    Parameters
    ----------
    x : array-like, shape (n_samples,)
        array of data points to test.

    Returns
    -------
    pval : float
        p-value (in [0,1]) from t-test on `x`.
    """
    assert np.ndim(x) == 1 and (not np.any(np.isnan(x)))

    if (len(x) <= 1) or (not np.all(np.isfinite(x))):
        return 1.0  # Can't say anything about scale => p=1

    _, pval = sst.ttest_1samp(x, 0.0)
    if np.isnan(pval):
        # Should only be possible if scale underflowed to zero:
        assert np.var(x, ddof=1) <= 1e-100
        # It is debatable if the condition should be ``np.mean(x) == 0.0`` or
        # ``np.all(x == 0.0)``. Should not matter in practice.
        pval = np.float(np.mean(x) == 0.0)
    assert 0.0 <= pval and pval <= 1.0
    return pval 
Example #19
Source File: simulator.py    From nltools with MIT License 5 votes vote down vote up
def _run_ttest(self, data):
        '''Helper function to run ttest on data'''
        flattened = data.reshape(self.grid_width*self.grid_width, self.n_subjects)
        t, p = ttest_1samp(flattened.T, 0)
        t = np.reshape(t, (self.grid_width, self.grid_width))
        p = np.reshape(p, (self.grid_width, self.grid_width))
        return (t, p) 
Example #20
Source File: adjacency.py    From nltools with MIT License 5 votes vote down vote up
def ttest(self, permutation=False, **kwargs):
        ''' Calculate ttest across samples.

        Args:
            permutation: (bool) Run ttest as permutation. Note this can be very slow.

        Returns:
            out: (dict) contains Adjacency instances of t values (or mean if
                 running permutation) and Adjacency instance of p values.

        '''
        if self.is_single_matrix:
            raise ValueError('t-test cannot be run on single matrices.')

        if permutation:
            t = []
            p = []
            for i in range(self.data.shape[1]):
                stats = one_sample_permutation(self.data[:, i], **kwargs)
                t.append(stats['mean'])
                p.append(stats['p'])
            t = Adjacency(np.array(t))
            p = Adjacency(np.array(p))
        else:
            t = self.mean().copy()
            p = deepcopy(t)
            t.data, p.data = ttest_1samp(self.data, 0, 0)

        return {'t': t, 'p': p} 
Example #21
Source File: sdn_detect.py    From sdnpwn with MIT License 5 votes vote down vote up
def testForSDN(testMethod, dstIP, count, interval):
  global verbose
  rtt = []
  sentMS = 0
  
  if(testMethod == "icmp"):
    sdnpwn.message("Testing with ICMP", sdnpwn.NORMAL)
    icmp = (IP(dst=dstIP)/ICMP())
    for i in range(0,count):
      sentMS = int(round(time.time() * 1000))
      resp = sr1(icmp)
      rtt.append((int(round(time.time() * 1000))) - sentMS)
      time.sleep(interval)
      
  elif(testMethod == "arp"):
    sdnpwn.message("Testing with ARP", sdnpwn.NORMAL)
    for i in range(0,count):
      sentMS = int(round(time.time() * 1000))
      resp = arping(dstIP)
      rtt.append((int(round(time.time() * 1000))) - sentMS)
      time.sleep(interval)
  
  initValue = rtt[0]
  rtt.pop(0)
  #Perform T-Test to check if first latency value is significantly different from others in our sample
  res = stats.ttest_1samp(rtt, initValue)
  if(verbose == True):
    sdnpwn.message("Initial RTT: " + str(initValue), sdnpwn.VERBOSE)
    sdnpwn.message("RTTs for other traffic: " + str(rtt), sdnpwn.VERBOSE)
    sdnpwn.message("Calculated p-value for inital RTT is " + str(res[1]), sdnpwn.VERBOSE)
  if(res[1] < .05 and all(i < initValue for i in rtt)): #If the p-value is less that 5% we can say that initValue is significant
    return True
  else:
    return False 
Example #22
Source File: TBATSModel_test.py    From tbats with MIT License 5 votes vote down vote up
def test_fit_alpha_and_trigonometric_series(self):
        alpha = 0.7
        gamma1 = 0.1
        gamma2 = 0.05
        period_length = 4
        np.random.seed(2342)
        T = 300

        l = l0 = 0.2
        s = s0 = 1
        s_star = s0_star = 0
        y = [0] * T
        lam = 2 * np.pi / period_length
        for t in range(0, T):
            d = np.random.normal()
            y[t] = l + s + d
            l = l + alpha * d
            s_prev = s
            s = s_prev * np.cos(lam) + s_star * np.sin(lam) + gamma1 * d
            s_star = - s_prev * np.sin(lam) + s_star * np.cos(lam) + gamma2 * d

        c = Components(use_arma_errors=False, seasonal_periods=[period_length], seasonal_harmonics=[1])
        p = ModelParams(c, alpha=alpha,  # gamma_params=[gamma1, gamma2],
                        x0=np.array([l0, s0, s0_star]))
        model = self.create_model(p)
        fitted_model = model.fit(y)
        resid = fitted_model.resid

        # Residuals should form a normal distribution
        _, pvalue = stats.normaltest(resid)
        assert 0.05 < pvalue  # large p-value, we can not reject null hypothesis of normal distribution

        # Mean of residuals should be close to 0
        _, pvalue = stats.ttest_1samp(resid, popmean=0.0)
        assert 0.05 < pvalue  # large p-value we can not reject null hypothesis that mean is 0

        # We expect 95% of residuals to lie within [-2,2] interval
        # 0.8 - lets choose something that expected 0.95 will meet
        assert len(resid[np.where(np.abs(resid) < 2)]) / len(resid) > 0.80 
Example #23
Source File: compare.py    From ccs-calendarserver with Apache License 2.0 5 votes vote down vote up
def ttest_1samp(a, popmean):
        # T statistic - http://mathworld.wolfram.com/Studentst-Distribution.html
        t = (stats.mean(a) - popmean) / (stats.stddev(a) / len(a) ** 0.5)
        v = len(a) - 1.0
        p = gamma((v + 1) / 2) / ((v * pi) ** 0.5 * gamma(v / 2)) * (1 + t ** 2 / v) ** (-(v + 1) / 2)
        return (t, p) 
Example #24
Source File: test_stats.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_ttest_1samp_new():
    n1, n2, n3 = (10,15,20)
    rvn1 = stats.norm.rvs(loc=5,scale=10,size=(n1,n2,n3))

    # check multidimensional array and correct axis handling
    # deterministic rvn1 and rvn2 would be better as in test_ttest_rel
    t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n2,n3)),axis=0)
    t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=0)
    t3,p3 = stats.ttest_1samp(rvn1[:,0,0], 1)
    assert_array_almost_equal(t1,t2, decimal=14)
    assert_almost_equal(t1[0,0],t3, decimal=14)
    assert_equal(t1.shape, (n2,n3))

    t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n1,n3)),axis=1)
    t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=1)
    t3,p3 = stats.ttest_1samp(rvn1[0,:,0], 1)
    assert_array_almost_equal(t1,t2, decimal=14)
    assert_almost_equal(t1[0,0],t3, decimal=14)
    assert_equal(t1.shape, (n1,n3))

    t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n1,n2)),axis=2)
    t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=2)
    t3,p3 = stats.ttest_1samp(rvn1[0,0,:], 1)
    assert_array_almost_equal(t1,t2, decimal=14)
    assert_almost_equal(t1[0,0],t3, decimal=14)
    assert_equal(t1.shape, (n1,n2))

    # test zero division problem
    t, p = stats.ttest_1samp([0, 0, 0], 1)
    assert_equal((np.abs(t), p), (np.inf, 0))

    with np.errstate(all='ignore'):
        assert_equal(stats.ttest_1samp([0, 0, 0], 0), (np.nan, np.nan))

        # check that nan in input array result in nan output
        anan = np.array([[1, np.nan],[-1, 1]])
        assert_equal(stats.ttest_1samp(anan, 0), ([0, np.nan], [1, np.nan])) 
Example #25
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_zero_division(self):
        t, p = mstats.ttest_1samp([0, 0, 0], 1)
        assert_equal((np.abs(t), p), (np.inf, 0))

        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in absolute")
            t, p = mstats.ttest_1samp([0, 0, 0], 0)
            assert_(np.isnan(t))
            assert_array_equal(p, (np.nan, np.nan)) 
Example #26
Source File: test_weightstats.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_weightstats_2(self):
        x1, x2 = self.x1, self.x2
        w1, w2 = self.w1, self.w2

        d1 = DescrStatsW(x1)
        d1w = DescrStatsW(x1, weights=w1)
        d2w = DescrStatsW(x2, weights=w2)
        x1r = d1w.asrepeats()
        x2r = d2w.asrepeats()
#        print 'random weights'
#        print ttest_ind(x1, x2, weights=(w1, w2))
#        print stats.ttest_ind(x1r, x2r)
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2],
                            stats.ttest_ind(x1r, x2r), 14)
        # not the same as new version with random weights/replication
#        assert x1r.shape[0] == d1w.sum_weights
#        assert x2r.shape[0] == d2w.sum_weights

        assert_almost_equal(x2r.mean(0), d2w.mean, 14)
        assert_almost_equal(x2r.var(), d2w.var, 14)
        assert_almost_equal(x2r.std(), d2w.std, 14)
        # note: the following is for 1d
        assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14)
        # assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19)
        # TODO: exception in corrcoef (scalar case)

        # one-sample tests
#        print d1.ttest_mean(3)
#        print stats.ttest_1samp(x1, 3)
#        print d1w.ttest_mean(3)
#        print stats.ttest_1samp(x1r, 3)
        assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11)
        assert_almost_equal(d1w.ttest_mean(3)[:2],
                            stats.ttest_1samp(x1r, 3), 11) 
Example #27
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_result_attributes(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        res = mstats.ttest_1samp(outcome[:, 0], 1)
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example #28
Source File: test_mstats_basic.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_fully_masked(self):
        np.random.seed(1234567)
        outcome = ma.masked_array(np.random.randn(3), mask=[1, 1, 1])
        expected = (np.nan, np.nan)
        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in absolute")
            for pair in [((np.nan, np.nan), 0.0), (outcome, 0.0)]:
                t, p = mstats.ttest_1samp(*pair)
                assert_array_equal(p, expected)
                assert_array_equal(t, expected) 
Example #29
Source File: test_cond_indep.py    From causal-text-embeddings with MIT License 5 votes vote down vote up
def compute_conditional_likelihood_ratios(df, term_counts, word_index):
	model_st_treated = train_classifier(df, term_counts, word_index)
	model_st_not_treated = train_classifier(df, term_counts, word_index, treat_index=0)
	probability_st_treated = compute_word_occur_prob(model_st_treated, df)
	probability_st_not_treated = compute_word_occur_prob(model_st_not_treated, df)
	log_likelihood_ratio = np.log(probability_st_treated / probability_st_not_treated)
	print("Mean and std. of log ll ratio", log_likelihood_ratio.mean(), ";", log_likelihood_ratio.std())
	print("sanity check:", (log_likelihood_ratio.mean()/(log_likelihood_ratio.std()/np.sqrt(df.shape[0]))))
	return ttest_1samp(log_likelihood_ratio, 0.0) 
Example #30
Source File: rafpc.py    From fylearn with MIT License 5 votes vote down vote up
def agreement_t_test(a, b):
    """ Check agreement based on means of two samples, using the t-statistic. """
    means1 = np.nanmean(a, 0)

    t1, p1 = stats.ttest_1samp(b, means1)
    # t2, p2 = stats.ttest_1samp(a, means2)

    # select agreeing featurs (p <= 0.05)
    return p1 < 0.05