Python scipy.stats.ttest_ind() Examples

The following are code examples for showing how to use scipy.stats.ttest_ind(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: scanorama   Author: brianhie   File: param_sensitivity.py    MIT License 7 votes vote down vote up
def test_knn(datasets_dimred, genes, labels, idx, distr, xlabels):
    knns = [ 5, 10, 50, 100 ]
    len_distr = len(distr)
    for knn in knns:
        integrated = assemble(datasets_dimred[:], knn=knn, sigma=150)
        X = np.concatenate(integrated)
        distr.append(sil(X[idx, :], labels[idx]))
        for d in distr[:len_distr]:
            print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
        xlabels.append(str(knn))
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('knn')) 
Example 2
Project: scanorama   Author: brianhie   File: param_sensitivity.py    MIT License 6 votes vote down vote up
def test_sigma(datasets_dimred, genes, labels, idx, distr, xlabels):
    sigmas = [ 10, 50, 100, 200 ]
    len_distr = len(distr)
    for sigma in sigmas:
        integrated = assemble(datasets_dimred[:], sigma=sigma)
        X = np.concatenate(integrated)
        distr.append(sil(X[idx, :], labels[idx]))
        for d in distr[:len_distr]:
            print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
        xlabels.append(str(sigma))
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('sigma')) 
Example 3
Project: scanorama   Author: brianhie   File: param_sensitivity.py    MIT License 6 votes vote down vote up
def test_alpha(datasets_dimred, genes, labels, idx, distr, xlabels):
    alphas = [ 0, 0.05, 0.20, 0.50 ]
    len_distr = len(distr)
    for alpha in alphas:
        integrated = assemble(datasets_dimred[:], alpha=alpha, sigma=150)
        X = np.concatenate(integrated)
        distr.append(sil(X[idx, :], labels[idx]))
        for d in distr[:len_distr]:
            print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
        xlabels.append(str(alpha))
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('alpha')) 
Example 4
Project: scanorama   Author: brianhie   File: param_sensitivity.py    MIT License 6 votes vote down vote up
def test_approx(datasets_dimred, genes, labels, idx, distr, xlabels):
    integrated = assemble(datasets_dimred[:], approx=False, sigma=150)
    X = np.concatenate(integrated)
    distr.append(sil(X[idx, :], labels[idx]))
    len_distr = len(distr)
    for d in distr[:len_distr]:
        print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
    xlabels.append('Exact NN')
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('approx')) 
Example 5
Project: scanorama   Author: brianhie   File: param_sensitivity.py    MIT License 6 votes vote down vote up
def test_perplexity(datasets_dimred, genes, labels, idx,
                    distr, xlabels):
    X = np.concatenate(datasets_dimred)

    perplexities = [ 10, 100, 500, 2000 ]
    len_distr = len(distr)
    for perplexity in perplexities:
        embedding = fit_tsne(X, perplexity=perplexity)
        distr.append(sil(embedding[idx, :], labels[idx]))
        for d in distr[:len_distr]:
            print(ttest_ind(np.ravel(X[idx, :]), np.ravel(d)))
        xlabels.append(str(perplexity))
    print('')
    
    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('perplexity')) 
Example 6
Project: GeoPy   Author: aerler   File: stats.py    GNU General Public License v3.0 6 votes vote down vote up
def ttest_ind_wrapper(data, size1=None, axis=None, ignoreNaN=True, equal_var=True):
  ''' Apply the Stundent's T-test for two independent samples, to test whether the samples 
      are drawn from the same underlying (continuous) distribution. This is a wrapper for the SciPy function that 
      removes NaN's and only returns the p-value (t-test is already vectorized). '''
  if axis is None and ignoreNaN:
    data1 = data[:size1]; data2 = data[size1:]
    nonans1 = np.invert(np.isnan(data1)) # test for NaN's
    nonans2 = np.invert(np.isnan(data2))
    if np.sum(nonans1) < 3 or np.sum(nonans2) < 3: return np.NaN # return, if less than 3 non-NaN's
    data1 = data1[nonans1]; data2 = data2[nonans2] # remove NaN's
  elif axis is None:
    data1 = data[:size1]; data2 = data[size1:]
  else:
    data1, data2 = np.split(data, [size1], axis=axis)
  # apply test
  D, pval = ss.ttest_ind(data1, data2, axis=axis, equal_var=equal_var); del D
  return pval  

# Mann-Whitney Rank Test on 2 samples 
Example 7
Project: LaserTOF   Author: kyleuckert   File: test_stats.py    MIT License 6 votes vote down vote up
def test_ttest_ind_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [np.nan, 2.0, 3.0, 4.0]
    y = [1.0, 2.0, 1.0, 2.0]

    r1 = stats.ttest_ind(x, y, nan_policy='omit')
    r2 = stats.ttest_ind(y, x, nan_policy='omit')
    assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
    assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)

    # NB: arguments are not paired when NaNs are dropped
    r3 = stats.ttest_ind(y, x[1:])
    assert_allclose(r2, r3, atol=1e-15)

    # .. and this is consistent with R. R code:
    # x = c(NA, 2.0, 3.0, 4.0)
    # y = c(1.0, 2.0, 1.0, 2.0)
    # t.test(x, y, var.equal=TRUE)
    assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901), atol=1e-15) 
Example 8
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 6 votes vote down vote up
def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3) 
Example 9
Project: vnpy_crypto   Author: birforce   File: test_weightstats.py    MIT License 6 votes vote down vote up
def test_weightstats_1(self):
        x1, x2 = self.x1, self.x2
        w1, w2 = self.w1, self.w2
        w1_ = 2. * np.ones(len(x1))
        w2_ = 2. * np.ones(len(x2))

        d1 = DescrStatsW(x1)
#        print ttest_ind(x1, x2)
#        print ttest_ind(x1, x2, usevar='unequal')
#        #print ttest_ind(x1, x2, usevar='unequal')
#        print stats.ttest_ind(x1, x2)
#        print ttest_ind(x1, x2, usevar='unequal', alternative='larger')
#        print ttest_ind(x1, x2, usevar='unequal', alternative='smaller')
#        print ttest_ind(x1, x2, usevar='unequal', weights=(w1_, w2_))
#        print stats.ttest_ind(np.r_[x1, x1], np.r_[x2,x2])
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1_, w2_))[:2],
                            stats.ttest_ind(np.r_[x1, x1], np.r_[x2, x2])) 
Example 10
Project: ble5-nrf52-mac   Author: tomasero   File: test_stats.py    MIT License 6 votes vote down vote up
def test_ttest_ind_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [np.nan, 2.0, 3.0, 4.0]
    y = [1.0, 2.0, 1.0, 2.0]

    r1 = stats.ttest_ind(x, y, nan_policy='omit')
    r2 = stats.ttest_ind(y, x, nan_policy='omit')
    assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
    assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)

    # NB: arguments are not paired when NaNs are dropped
    r3 = stats.ttest_ind(y, x[1:])
    assert_allclose(r2, r3, atol=1e-15)

    # .. and this is consistent with R. R code:
    # x = c(NA, 2.0, 3.0, 4.0)
    # y = c(1.0, 2.0, 1.0, 2.0)
    # t.test(x, y, var.equal=TRUE)
    assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901), atol=1e-15) 
Example 11
Project: FAE   Author: salan668   File: statistcs.py    GNU General Public License v3.0 6 votes vote down vote up
def _statistic_continuous(array_0, array_1):

        # 判断方差齐性
        sta_value, p_value_h = levene(array_0, array_1)
        # 判断正态性
        w_train, p_value_train_n = kstest(array_0, 'norm')
        w_test, p_value_test_n = kstest(array_1, 'norm')
        # 如果方差齐性并且都满足正态分布,做U检验
        if p_value_h >= 0.05 and p_value_train_n >= 0.05 and p_value_test_n >= 0.05:
            stat_num, p_value = ttest_ind(array_0, array_1)
            statistic_method = 'T-test'
        else:
            stat_num, p_value = mannwhitneyu(array_0, array_1)
            statistic_method = 'U'

        data_description = [str('%.2f' % np.mean(array_0)) + '±' + str('%.2f' % np.std(array_0)),
                            str('%.2f' % np.mean(array_1)) + '±' + str('%.2f' % np.std(array_1))]
        return data_description, statistic_method,  p_value 
Example 12
Project: musical-onset-efficient   Author: ronggong   File: ttest_experiment.py    GNU Affero General Public License v3.0 6 votes vote down vote up
def pValueAll(f1_jan, f1_no_dense, f1_relu, f1_temporal, f1_deep, f1_less_deep, f1_bidi_400, f1_bidi_200, f1_bidi_100):

    _, p_jan_no_dense = ttest_ind(f1_jan, f1_no_dense, equal_var=False)

    _, p_jan_relu = ttest_ind(f1_jan, f1_relu, equal_var=False)

    _, p_jan_temporal = ttest_ind(f1_jan, f1_temporal, equal_var=False)

    _, p_jan_deep = ttest_ind(f1_jan, f1_deep, equal_var=False)

    _, p_jan_less_deep = ttest_ind(f1_jan, f1_less_deep, equal_var=False)

    _, p_jan_bidi_400 = ttest_ind(f1_jan, f1_bidi_400, equal_var=False)

    _, p_jan_bidi_200 = ttest_ind(f1_jan, f1_bidi_200, equal_var=False)

    _, p_jan_bidi_100 = ttest_ind(f1_jan, f1_bidi_100, equal_var=False)


    return p_jan_no_dense, p_jan_relu, p_jan_temporal, p_jan_deep, \
           p_jan_less_deep, p_jan_bidi_400, p_jan_bidi_200, p_jan_bidi_100 
Example 13
Project: musical-onset-efficient   Author: ronggong   File: ttest_experiment_transfer.py    GNU Affero General Public License v3.0 6 votes vote down vote up
def pValueAll(f1_jan,
              f1_pretrained,
              f1_weight_init,
              f1_feature_extraction,
              f1_deep_feature_extraction):

    _, p_jan_pretrained = ttest_ind(f1_jan, f1_pretrained, equal_var=False)

    _, p_jan_weight_init = ttest_ind(f1_jan, f1_weight_init, equal_var=False)

    _, p_jan_feature_extraction = ttest_ind(f1_jan, f1_feature_extraction, equal_var=False)

    _, p_jan_deep_feature_extraction = ttest_ind(f1_jan, f1_deep_feature_extraction, equal_var=False)

    print(p_jan_pretrained)
    print(p_jan_weight_init)
    print(p_jan_feature_extraction)
    print(p_jan_deep_feature_extraction)

    return p_jan_pretrained, p_jan_weight_init, p_jan_feature_extraction, p_jan_deep_feature_extraction 
Example 14
Project: poker   Author: surgebiswas   File: test_stats.py    MIT License 6 votes vote down vote up
def test_ttest_ind_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [np.nan, 2.0, 3.0, 4.0]
    y = [1.0, 2.0, 1.0, 2.0]

    r1 = stats.ttest_ind(x, y, nan_policy='omit')
    r2 = stats.ttest_ind(y, x, nan_policy='omit')
    assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
    assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)

    # NB: arguments are not paired when NaNs are dropped
    r3 = stats.ttest_ind(y, x[1:])
    assert_allclose(r2, r3, atol=1e-15)

    # .. and this is consistent with R. R code:
    # x = c(NA, 2.0, 3.0, 4.0)
    # y = c(1.0, 2.0, 1.0, 2.0)
    # t.test(x, y, var.equal=TRUE)
    assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901), atol=1e-15) 
Example 15
Project: P3_image_processing   Author: latedude2   File: test_stats.py    MIT License 6 votes vote down vote up
def test_ttest_ind_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [np.nan, 2.0, 3.0, 4.0]
    y = [1.0, 2.0, 1.0, 2.0]

    r1 = stats.ttest_ind(x, y, nan_policy='omit')
    r2 = stats.ttest_ind(y, x, nan_policy='omit')
    assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
    assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)

    # NB: arguments are not paired when NaNs are dropped
    r3 = stats.ttest_ind(y, x[1:])
    assert_allclose(r2, r3, atol=1e-15)

    # .. and this is consistent with R. R code:
    # x = c(NA, 2.0, 3.0, 4.0)
    # y = c(1.0, 2.0, 1.0, 2.0)
    # t.test(x, y, var.equal=TRUE)
    assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901), atol=1e-15) 
Example 16
Project: pysciencedock   Author: Kitware   File: ttest.py    Apache License 2.0 6 votes vote down vote up
def ttest(data):
    if len(data.index.levels[1]) != 2:
        raise Exception('T-test requires secondary index with two values')

    indexA, indexB = data.index.levels[1]

    dataA = data.xs(indexA, level=1)
    dataB = data.xs(indexB, level=1)

    statistic, pvalues = ttest_ind(dataA, dataB)

    pvalues = pd.DataFrame(
        [statistic, pvalues, -np.log10(pvalues)],
        columns=data.columns,
        index=['t', 'p', '-log10(p)']).transpose()

    return pvalues 
Example 17
Project: pysciencedock   Author: Kitware   File: volcano.py    Apache License 2.0 6 votes vote down vote up
def volcano(data):
    if len(data.index.levels[1]) != 2:
        raise Exception('Volcano requires secondary index with two values')

    indexA, indexB = data.index.levels[1]

    dataA = data.xs(indexA, level=1)
    dataB = data.xs(indexB, level=1)

    meanA = dataA.mean(axis=0)
    meanB = dataB.mean(axis=0)

    change = meanB.div(meanA)

    statistic, pvalues = ttest_ind(dataA, dataB)

    pvalues = pd.DataFrame(
        [statistic, pvalues, -np.log10(pvalues), change, np.log2(change)],
        columns=data.columns,
        index=['t', 'p', '-log10(p)', 'foldchange', 'log2(foldchange)']).transpose()

    return pvalues 
Example 18
Project: spotpy   Author: thouska   File: analyser.py    MIT License 6 votes vote down vote up
def compare_different_objectivefunctions(like1,like2):
    """
    Performs the Welch’s t-test (aka unequal variances t-test)

    :like1: objectivefunction values
    :type: list

    :like2: Other objectivefunction values
    :type: list

    :return: p Value
    :rtype: list
    """
    from scipy import stats
    out = stats.ttest_ind(like1, like2, equal_var=False)
    print(out)
    if out[1]>0.05:
        print('like1 is NOT signifikant different to like2: p>0.05')
    else:
        print('like1 is signifikant different to like2: p<0.05' )
    return out 
Example 19
Project: PyMAP   Author: AminMahpour   File: Stats.py    GNU General Public License v2.0 6 votes vote down vote up
def run(self):
        sample1_betas = []
        sample2_betas = []

        for sam1 in self.sample1:
            sample1_betas.append(sam1.probes[self.probe])
        for sam2 in self.sample2:
            sample2_betas.append(sam2.probes[self.probe])
        #print(sample1_betas)
        #print(sample2_betas)

        (t, pval) = statsx.ttest_ind(sample1_betas, sample2_betas)
        #print(float(pval))

        if float(pval) < float(0.05):
            self.diff_id_list.append(self.probe)
            print ("DONE...") 
Example 20
Project: PyMAP   Author: AminMahpour   File: Stats.py    GNU General Public License v2.0 6 votes vote down vote up
def ttest(self, samples1, samples2, probe):
        sample1_betas = []
        sample2_betas = []

        for sam1 in samples1:
            sample1_betas.append(sam1.probes[probe])
        for sam2 in samples2:
            sample2_betas.append(sam2.probes[probe])
        #print(sample1_betas)
        #print(sample2_betas)

        (t, pval) = statsx.ttest_ind(sample1_betas, sample2_betas)
        #print(float(pval))

        if float(pval) < float(0.05):
            self.diff_probe_id.append(probe) 
Example 21
Project: pscore_match   Author: kellieotto   File: match.py    BSD 2-Clause "Simplified" License 6 votes vote down vote up
def t_test(covariates, groups):
    """ 
    Two sample t test for the distribution of treatment and control covariates
    
    Parameters
    ----------
    covariates : DataFrame 
        Dataframe with one covariate per column.
        If matches are with replacement, then duplicates should be 
        included as additional rows.
    groups : array-like
        treatment assignments, must be 2 groups
    
    Returns
    -------
    A list of p-values, one for each column in covariates
    """
    colnames = list(covariates.columns)
    J = len(colnames)
    pvalues = np.zeros(J)
    for j in range(J):
        var = covariates[colnames[j]]
        res = ttest_ind(var[groups == 1], var[groups == 0])
        pvalues[j] = res.pvalue
    return pvalues 
Example 22
Project: SyntheticStatistics   Author: BlissChapman   File: multivariate_power_analysis.py    MIT License 6 votes vote down vote up
def fdr_t_test_power(d1, d2, n_1, n_2, alpha=0.05, k=10**1):
    fdr_rejections = []

    for br in range(k):
        d1_idx = np.random.randint(low=0, high=d1.shape[0], size=n_1)
        d2_idx = np.random.randint(low=0, high=d2.shape[0], size=n_2)

        d1_replicate = d1[d1_idx].squeeze()
        d2_replicate = d2[d2_idx].squeeze()

        # FDR corrected univariate tests
        two_sample_t_test_p_vals_by_dim = np.zeros(d1.shape[1:])
        for i in range(two_sample_t_test_p_vals_by_dim.shape[0]):
            d1_vals = d1[:, i]
            d2_vals = d2[:, i]
            two_sample_t_test_p_vals_by_dim[i] = ttest_ind(d1_vals, d2_vals, equal_var=True).pvalue

        fdr_reject_by_dim = fdr_correction(two_sample_t_test_p_vals_by_dim, alpha=alpha)[0]
        fdr_reject = sum(fdr_reject_by_dim) > 0  # reject if any dim rejects
        fdr_rejections.append(fdr_reject)

    fdr_power = np.mean(fdr_rejections)
    return fdr_power 
Example 23
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_stats.py    MIT License 6 votes vote down vote up
def test_ttest_ind_nan_2nd_arg():
    # regression test for gh-6134: nans in the second arg were not handled
    x = [np.nan, 2.0, 3.0, 4.0]
    y = [1.0, 2.0, 1.0, 2.0]

    r1 = stats.ttest_ind(x, y, nan_policy='omit')
    r2 = stats.ttest_ind(y, x, nan_policy='omit')
    assert_allclose(r2.statistic, -r1.statistic, atol=1e-15)
    assert_allclose(r2.pvalue, r1.pvalue, atol=1e-15)

    # NB: arguments are not paired when NaNs are dropped
    r3 = stats.ttest_ind(y, x[1:])
    assert_allclose(r2, r3, atol=1e-15)

    # .. and this is consistent with R. R code:
    # x = c(NA, 2.0, 3.0, 4.0)
    # y = c(1.0, 2.0, 1.0, 2.0)
    # t.test(x, y, var.equal=TRUE)
    assert_allclose(r2, (-2.5354627641855498, 0.052181400457057901), atol=1e-15) 
Example 24
Project: DIVE-backend   Author: MacroConnections   File: numerical_comparison.py    GNU General Public License v3.0 6 votes vote down vote up
def ttest(df, fields, indep, dep):
    # Ensure single field
    dep_field_name = dep[0]
    indep_field_name = indep[0]
    unique_indep_values = get_unique(df[indep_field_name])

    subsets = {}
    for v in unique_indep_values:
        subsets[v] = np.array(df[df[indep_field_name] == v][dep_field_name])

    result = {}
    for (x, y) in combinations(unique_indep_values, 2):
        (statistic, pvalue) = ttest_ind(subsets[x], subsets[y])
        result[str([x, y])] = {
            'statistic': statistic,
            'pvalue': pvalue
        }

    return result



##################
#Functions to determine which tests could be run
################## 
Example 25
Project: GeoPy   Author: aerler   File: stats.py    GNU General Public License v3.0 5 votes vote down vote up
def ttest_ind(sample1, sample2, equal_var=True, lstatistic=False, ignoreNaN=True, **kwargs):
  ''' Apply the Stundent's T-test for two independent samples, to test whether the samples 
      are drawn from the same underlying (continuous) distribution; a high p-value means, 
      the two samples are likely drawn from the same distribution. 
      The T-test implementation is vectoriezed (unlike all other tests).'''
  if lstatistic: raise NotImplementedError("Return of test statistic is not yet implemented; only p-values are returned.")
  testfct = functools.partial(ttest_ind_wrapper, ignoreNaN=ignoreNaN, equal_var=equal_var)
  pvar = apply_stat_test_2samp(sample1, sample2, fct=testfct, laax=False, 
                               lpval=True, lrho=False, **kwargs)
  return pvar 
Example 26
Project: me-ica   Author: ME-ICA   File: select_model_fft20e.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def fft_variance(fproj_arr,fproj_arr_val,A,B):
	fproj_sel_T = stats.ttest_ind(fproj_arr[:,A].T,fproj_arr[:,B].T)
	fproj_sel_A = (andb([fproj_sel_T[0]>0,fproj_sel_T[1]<0.05])==2).reshape(mask.shape[0:2])
	fproj_sel_B = (andb([fproj_sel_T[0]<0,fproj_sel_T[1]<0.05])==2).reshape(mask.shape[0:2])
	return fproj_arr_val[fproj_sel_A.flatten()].sum(0),fproj_arr_val[fproj_sel_B.flatten()].sum(0) 
Example 27
Project: me-ica   Author: ME-ICA   File: select_model_fft20d.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def fft_variance(fproj_arr,fproj_arr_val,A,B):
	fproj_sel_T = stats.ttest_ind(fproj_arr[:,A].T,fproj_arr[:,B].T)
	fproj_sel_A = (andb([fproj_sel_T[0]>0,fproj_sel_T[1]<0.05])==2).reshape(mask.shape[0:2])
	fproj_sel_B = (andb([fproj_sel_T[0]<0,fproj_sel_T[1]<0.05])==2).reshape(mask.shape[0:2])
	return fproj_arr_val[fproj_sel_A.flatten()].sum(0),fproj_arr_val[fproj_sel_B.flatten()].sum(0) 
Example 28
Project: TPs   Author: DataMiningP7   File: tp2_solutions.py    MIT License 5 votes vote down vote up
def test_word_means(X, y, word_index):
    """ Performs a two-means t-test on the tf-idf values of a given word
     represented by its index in the matrix X. The test checks whether the word
     is over-represented in spammy messages and returns its p-value. The
     smaller the p-value, the more over-represented the word is within spams
     compared to hams.


    Args:
        X: the TF-IDF matrix where each line represents a document and each
           column represents a word, typically obtained by running
           transform_text().
        y: a binary vector where the i-th value indicates whether the i-th
           document is a spam, typically obtained by running transform_text().
        word_index: an int representing a column number in X.
    Returns:
        A double that corresponds to the p-value of the test (the probability
        that the word is NOT over-represented in the spams).
   """
    # get a full matrice instead of a sparse one
    X = X.todense()

    x0 = X[ y == 0, word_index ]
    x1 = X[ y == 1, word_index ]

    #  t < 0 means x0 < x1
    t, p = ttest_ind(x0, x1)
    return p 
Example 29
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_zero_division(self):
        t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore')
            assert_equal((np.abs(t), p), (np.inf, 0))
            assert_array_equal(mstats.ttest_ind([0, 0, 0], [0, 0, 0]),
                               (np.nan, np.nan)) 
Example 30
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)

        # Check equal_var
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        assert_allclose(res4, res5)
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        assert_allclose(res4, res5) 
Example 31
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_fully_masked(self):
        np.random.seed(1234567)
        outcome = ma.masked_array(np.random.randn(3, 2), mask=[[1, 1, 1], [0, 0, 0]])
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore')
            assert_array_equal(mstats.ttest_ind(outcome[:, 0], outcome[:, 1]),
                               (np.nan, np.nan))
            assert_array_equal(mstats.ttest_ind([np.nan, np.nan], [1.0, 2.0]),
                               (np.nan, np.nan)) 
Example 32
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 33
Project: LaserTOF   Author: kyleuckert   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_empty(self):
        res1 = mstats.ttest_ind([], [])
        assert_(np.all(np.isnan(res1))) 
Example 34
Project: QuantStudio   Author: Scorpi000   File: Difference.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_move__(self, idt, **kwargs):
        if self._iDT==idt: return 0
        self._iDT = idt
        if self.CalcDTs:
            if idt not in self.CalcDTs[self._CurCalcInd:]: return 0
            self._CurCalcInd = self.CalcDTs[self._CurCalcInd:].index(idt) + self._CurCalcInd
            PreInd = self._CurCalcInd - self.ForecastPeriod - self.Lag
            LastInd = self._CurCalcInd - self.ForecastPeriod
            PreDateTime = self.CalcDTs[PreInd]
            LastDateTime = self.CalcDTs[LastInd]
        else:
            self._CurCalcInd = self._Model.DateTimeIndex
            PreInd = self._CurCalcInd - self.ForecastPeriod - self.Lag
            LastInd = self._CurCalcInd - self.ForecastPeriod
            PreDateTime = self._Model.DateTimeSeries[PreInd]
            LastDateTime = self._Model.DateTimeSeries[LastInd]
        if (PreInd<0) or (LastInd<0): return 0
        Price = self._PriceTable.readData(dts=[LastDateTime, idt], ids=self._Output["证券ID"], factor_names=[self.PriceFactor]).iloc[0, :, :].values
        self._Output["收益率"] = np.r_[self._Output["收益率"], _calcReturn(Price, return_type=self.ReturnType)]
        FactorData = self._FactorTable.readData(dts=[PreDateTime], ids=[self.FactorID], factor_names=[self.TestFactor]).iloc[0, 0, 0]
        self._Output["因子值"] = np.r_[self._Output["因子值"], FactorData]
        if self._Output["收益率"].shape[0]<self.MinSummaryWindow: return 0
        StartInd = int(max(0, self._Output["收益率"].shape[0] - self.SummaryWindow))
        FactorData, Return = self._Output["因子值"][StartInd:], self._Output["收益率"][StartInd:, :]
        Mask = np.full(shape=(FactorData.shape[0], self.GroupNum), fill_value=False)
        for j in range(self.GroupNum):
            if j==0: Mask[FactorData<=np.percentile(FactorData, (j+1)/self.GroupNum*100), j] = True
            else: Mask[((FactorData>np.percentile(FactorData, j/self.GroupNum*100)) & (FactorData<=np.percentile(FactorData, (j+1)/self.GroupNum*100))), j] = True
        for i, iID in enumerate(self._Output["证券ID"]):
            itStat, ipValue = np.full(shape=(self.GroupNum, self.GroupNum), fill_value=np.nan), np.full(shape=(self.GroupNum, self.GroupNum), fill_value=np.nan)
            for j in range(self.GroupNum):
                for k in range(j+1, self.GroupNum):
                    jkResult = stats.ttest_ind(Return[Mask[:, j], i], Return[Mask[:, k], i], equal_var=False, nan_policy="omit")
                    itStat[j, k], ipValue[j, k] = jkResult.statistic, jkResult.pvalue
                    itStat[k, j], ipValue[k, j] = -itStat[j, k], ipValue[j, k]
            self._Output["滚动t统计量"][iID][idt], self._Output["滚动p值"][iID][idt] = itStat, ipValue
        return 0 
Example 35
Project: QuantStudio   Author: Scorpi000   File: Difference.py    GNU General Public License v3.0 5 votes vote down vote up
def __QS_end__(self):
        if not self._isStarted: return 0
        super().__QS_end__()
        FactorData, Return, PriceIDs = self._Output.pop("因子值"), self._Output.pop("收益率"), self._Output.pop("证券ID")
        self._Output["全样本t统计量"], self._Output["全样本p值"] = {}, {}
        Mask = np.full(shape=(FactorData.shape[0], self.GroupNum), fill_value=False)
        for j in range(self.GroupNum):
            if j==0: Mask[FactorData<=np.percentile(FactorData, (j+1)/self.GroupNum*100), j] = True
            else: Mask[((FactorData>np.percentile(FactorData, j/self.GroupNum*100)) & (FactorData<=np.percentile(FactorData, (j+1)/self.GroupNum*100))), j] = True
        for i, iID in enumerate(PriceIDs):
            itStat, ipValue = np.full(shape=(self.GroupNum, self.GroupNum), fill_value=np.nan), np.full(shape=(self.GroupNum, self.GroupNum), fill_value=np.nan)
            for j in range(self.GroupNum):
                for k in range(j+1, self.GroupNum):
                    jkResult = stats.ttest_ind(Return[Mask[:, j], i], Return[Mask[:, k], i], equal_var=False, nan_policy="omit")
                    itStat[j, k], ipValue[j, k] = jkResult.statistic, jkResult.pvalue
                    itStat[k, j], ipValue[k, j] = -itStat[j, k], ipValue[j, k]
            self._Output["全样本t统计量"][iID], self._Output["全样本p值"][iID] = itStat, ipValue
        DTs = sorted(self._Output["滚动t统计量"][PriceIDs[0]])
        for iID in PriceIDs:
            self._Output["滚动t统计量"][iID] = pd.Panel(self._Output["滚动t统计量"][iID]).to_frame(filter_observations=False)
            self._Output["滚动p值"][iID] = pd.Panel(self._Output["滚动p值"][iID]).to_frame(filter_observations=False)
            self._Output["全样本t统计量"][iID] = pd.DataFrame(self._Output["全样本t统计量"][iID]).stack(dropna=False)
            self._Output["全样本p值"][iID] = pd.DataFrame(self._Output["全样本p值"][iID]).stack(dropna=False)
        self._Output["滚动t统计量"] = pd.Panel(self._Output["滚动t统计量"]).to_frame(filter_observations=False)
        self._Output["滚动t统计量"].index.names = ["分位数组1", "分位数组2", "时点"]
        self._Output["滚动t统计量"] = self._Output["滚动t统计量"].reset_index()
        self._Output["滚动t统计量"] = self._Output["滚动t统计量"][self._Output["滚动t统计量"]["分位数组1"]!=self._Output["滚动t统计量"]["分位数组2"]]
        self._Output["滚动p值"] = pd.Panel(self._Output["滚动p值"]).to_frame(filter_observations=False)
        self._Output["滚动p值"].index.names = ["分位数组1", "分位数组2", "时点"]
        self._Output["滚动p值"] = self._Output["滚动p值"].reset_index()
        self._Output["滚动p值"] = self._Output["滚动p值"][self._Output["滚动p值"]["分位数组1"]!=self._Output["滚动p值"]["分位数组2"]]
        self._Output["全样本t统计量"] = pd.DataFrame(self._Output["全样本t统计量"]).reset_index()
        self._Output["全样本p值"] = pd.DataFrame(self._Output["全样本p值"]).reset_index()
        self._Output["全样本t统计量"].columns = self._Output["全样本p值"].columns = ["分位数组1", "分位数组2"]+PriceIDs
        self._Output["全样本t统计量"] = self._Output["全样本t统计量"][self._Output["全样本t统计量"]["分位数组1"]!=self._Output["全样本t统计量"]["分位数组2"]]
        self._Output["全样本p值"] = self._Output["全样本p值"][self._Output["全样本p值"]["分位数组1"]!=self._Output["全样本p值"]["分位数组2"]]
        self._Output["最后一期t统计量"] = self._Output["滚动t统计量"][self._Output["滚动t统计量"]["时点"]==DTs[-1]]
        self._Output["最后一期p值"] = self._Output["滚动p值"][self._Output["滚动p值"]["时点"]==DTs[-1]]
        self._Output["最后一期t统计量"].pop("时点")
        self._Output["最后一期p值"].pop("时点")
        return 0 
Example 36
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_mstats_basic.py    GNU General Public License v3.0 5 votes vote down vote up
def test_empty(self):
        res1 = mstats.ttest_ind([], [])
        assert_(np.all(np.isnan(res1))) 
Example 37
Project: StratoVari   Author: zmlabe   File: calc_Utilities.py    MIT License 5 votes vote down vote up
def calc_indttest(varx,vary):
    """
    Function calculates statistical difference for 2 independent
    sample t-test

    Parameters
    ----------
    varx : 3d array
    vary : 3d array
    
    Returns
    -------
    stat = calculated t-statistic
    pvalue = two-tailed p-value

    Usage
    -----
    stat,pvalue = calc_ttest(varx,vary)
    """
    print('\n>>> Using calc_ttest function!')
    
    ### Import modules
    import numpy as np
    import scipy.stats as sts
    
    ### 2-independent sample t-test
    stat,pvalue = sts.ttest_ind(varx,vary,nan_policy='omit')
    
    ### Significant at 95% confidence level
    pvalue[np.where(pvalue >= 0.1)] = np.nan
    pvalue[np.where(pvalue < 0.1)] = 1.
    
    print('*Completed: Finished calc_ttest function!')
    return stat,pvalue

###############################################################################
###############################################################################
############################################################################### 
Example 38
Project: StratoVari   Author: zmlabe   File: plot_Maps_FDR_Monthly_Variables.py    MIT License 5 votes vote down vote up
def calc_indttestfdr(varx,vary):
    """
    Function calculates statistical difference for 2 independent
    sample t-test

    Parameters
    ----------
    varx : 3d array
    vary : 3d array
    
    Returns
    -------
    stat = calculated t-statistic
    pvalue = two-tailed p-value

    Usage
    -----
    stat,pvalue = calc_ttest(varx,vary)
    """
    print('\n>>> Using calc_ttest function!')
    
    ### Import modules
    import scipy.stats as sts
    
    ### 2-independent sample t-test
    stat,pvalue = sts.ttest_ind(varx,vary,nan_policy='omit')
    
    print('*Completed: Finished calc_ttest function!')
    return stat,pvalue

###################### 
Example 39
Project: StratoVari   Author: zmlabe   File: plot_ProfileVar_Monthly_FDR.py    MIT License 5 votes vote down vote up
def calc_indttestfdr(varx,vary):
    """
    Function calculates statistical difference for 2 independent
    sample t-test

    Parameters
    ----------
    varx : 3d array
    vary : 3d array
    
    Returns
    -------
    stat = calculated t-statistic
    pvalue = two-tailed p-value

    Usage
    -----
    stat,pvalue = calc_ttest(varx,vary)
    """
    print('\n>>> Using calc_ttest function!')
    
    ### Import modules
    import scipy.stats as sts
    
    ### 2-independent sample t-test
    stat,pvalue = sts.ttest_ind(varx,vary,nan_policy='omit')
    
    print('*Completed: Finished calc_ttest function!')
    return stat,pvalue

###################### 
Example 40
Project: StratoVari   Author: zmlabe   File: plot_Maps_FDR_Monthly_Variables_SIT.py    MIT License 5 votes vote down vote up
def calc_indttestfdr(varx,vary):
    """
    Function calculates statistical difference for 2 independent
    sample t-test

    Parameters
    ----------
    varx : 3d array
    vary : 3d array
    
    Returns
    -------
    stat = calculated t-statistic
    pvalue = two-tailed p-value

    Usage
    -----
    stat,pvalue = calc_ttest(varx,vary)
    """
    print('\n>>> Using calc_ttest function!')
    
    ### Import modules
    import scipy.stats as sts
    
    ### 2-independent sample t-test
    stat,pvalue = sts.ttest_ind(varx,vary,nan_policy='omit')
    
    print('*Completed: Finished calc_ttest function!')
    return stat,pvalue

###################### 
Example 41
Project: spnss   Author: awd4   File: compare.py    MIT License 5 votes vote down vote up
def t_test(name):
    #if name == 'c20ng':
    #    return
    dresults = experiment.read_csv( experiment.csv_path(name, 'dag', 'compare') )
    tresults = experiment.read_csv( experiment.csv_path(name, 'tree', 'compare') )
    dtst = [float(r['tst_llh']) for r in dresults]
    ttst = [float(r['tst_llh']) for r in tresults]
    dsize = [int(r['num_nodes']) for r in dresults]
    tsize = [int(r['num_nodes']) for r in tresults]
    dtime = [float(r['time']) for r in dresults]
    ttime = [float(r['time']) for r in tresults]
    print (name+'       ')[:8], '\t%.4f'%np.mean(dtst), '\t%.4f'%np.mean(ttst), '\t', np.mean(dtst) > np.mean(ttst), '\t\t', '%6d'%int(np.mean(dsize)), '%7d'%int(np.mean(tsize)), '\t', '%.4f'%(np.mean(dsize)/np.mean(tsize)), '\t%8.2f'%np.mean(dtime), '  %8.2f'%np.mean(ttime), np.mean(dtime) / np.mean(ttime),
    from scipy import stats
    print '\t', stats.ttest_ind( dtst, ttst, equal_var=False )[1] < 0.05 
Example 42
Project: vnpy_crypto   Author: birforce   File: test_weightstats.py    MIT License 5 votes vote down vote up
def test_weightstats_2(self):
        x1, x2 = self.x1, self.x2
        w1, w2 = self.w1, self.w2

        d1 = DescrStatsW(x1)
        d1w = DescrStatsW(x1, weights=w1)
        d2w = DescrStatsW(x2, weights=w2)
        x1r = d1w.asrepeats()
        x2r = d2w.asrepeats()
#        print 'random weights'
#        print ttest_ind(x1, x2, weights=(w1, w2))
#        print stats.ttest_ind(x1r, x2r)
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2],
                            stats.ttest_ind(x1r, x2r), 14)
        # not the same as new version with random weights/replication
#        assert x1r.shape[0] == d1w.sum_weights
#        assert x2r.shape[0] == d2w.sum_weights

        assert_almost_equal(x2r.mean(0), d2w.mean, 14)
        assert_almost_equal(x2r.var(), d2w.var, 14)
        assert_almost_equal(x2r.std(), d2w.std, 14)
        # note: the following is for 1d
        assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14)
        # assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19)
        # TODO: exception in corrcoef (scalar case)

        # one-sample tests
#        print d1.ttest_mean(3)
#        print stats.ttest_1samp(x1, 3)
#        print d1w.ttest_mean(3)
#        print stats.ttest_1samp(x1r, 3)
        assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11)
        assert_almost_equal(d1w.ttest_mean(3)[:2],
                            stats.ttest_1samp(x1r, 3), 11) 
Example 43
Project: vnpy_crypto   Author: birforce   File: test_weightstats.py    MIT License 5 votes vote down vote up
def test_weightstats_3(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        w1, w2 = self.w1, self.w2

        d1w_2d = DescrStatsW(x1_2d, weights=w1)
        d2w_2d = DescrStatsW(x2_2d, weights=w2)
        x1r_2d = d1w_2d.asrepeats()
        x2r_2d = d2w_2d.asrepeats()

        assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14)
        assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14)
        assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14)
        assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14)
        assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14)

#        print d1w_2d.ttest_mean(3)
#        #scipy.stats.ttest is also vectorized
#        print stats.ttest_1samp(x1r_2d, 3)
        t, p, d = d1w_2d.ttest_mean(3)
        assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11)
        # print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T]
        cm = CompareMeans(d1w_2d, d2w_2d)
        ressm = cm.ttest_ind()
        resss = stats.ttest_ind(x1r_2d, x2r_2d)
        assert_almost_equal(ressm[:2], resss, 14)

#        doesn't work for 2d, levene doesn't use weights
#        cm = CompareMeans(d1w_2d, d2w_2d)
#        ressm = cm.test_equal_var()
#        resss = stats.levene(x1r_2d, x2r_2d)
#        assert_almost_equal(ressm[:2], resss, 14) 
Example 44
Project: vnpy_crypto   Author: birforce   File: test_weightstats.py    MIT License 5 votes vote down vote up
def test_ttest(self):
        x1r = self.x1r
        d1w = self.d1w
        assert_almost_equal(d1w.ttest_mean(3)[:2],
                            stats.ttest_1samp(x1r, 3), 11)

#    def
#        assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2],
#                            stats.ttest_ind(x1r, x2r), 14) 
Example 45
Project: TabPy   Author: tableau   File: tTest.py    MIT License 5 votes vote down vote up
def ttest(_arg1, _arg2):
    """
    T-Test is a statistical hypothesis test that is used to compare
    two sample means or a sample’s mean against a known population mean.
    For more information on the function and how to use it please refer
    to tabpy-tools.md
    """
    # one sample test with mean
    if len(_arg2) == 1:
        test_stat, p_value = stats.ttest_1samp(_arg1, _arg2)
        return p_value
    # two sample t-test where _arg1 is numeric and _arg2 is a binary factor
    elif len(set(_arg2)) == 2:
        # each sample in _arg1 needs to have a corresponding classification
        # in _arg2
        if not (len(_arg1) == len(_arg2)):
            raise ValueError
        class1, class2 = set(_arg2)
        sample1 = []
        sample2 = []
        for i in range(len(_arg1)):
            if _arg2[i] == class1:
                sample1.append(_arg1[i])
            else:
                sample2.append(_arg1[i])
        test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False)
        return p_value
    # arg1 is a sample and arg2 is a sample
    else:
        test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False)
        return p_value 
Example 46
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_zero_division(self):
        t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
        assert_equal((np.abs(t), p), (np.inf, 0))

        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in absolute")
            t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0])
            assert_array_equal(t, np.array([np.nan, np.nan]))
            assert_array_equal(p, np.array([np.nan, np.nan])) 
Example 47
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)

        # Check equal_var
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        assert_allclose(res4, res5)
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        assert_allclose(res4, res5) 
Example 48
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_fully_masked(self):
        np.random.seed(1234567)
        outcome = ma.masked_array(np.random.randn(3, 2), mask=[[1, 1, 1], [0, 0, 0]])
        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in absolute")
            for pair in [(outcome[:, 0], outcome[:, 1]), ([np.nan, np.nan], [1.0, 2.0])]:
                t, p = mstats.ttest_ind(*pair)
                assert_array_equal(t, (np.nan, np.nan))
                assert_array_equal(p, (np.nan, np.nan)) 
Example 49
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 50
Project: ble5-nrf52-mac   Author: tomasero   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_empty(self):
        res1 = mstats.ttest_ind([], [])
        assert_(np.all(np.isnan(res1))) 
Example 51
Project: gender_analysis   Author: dhmit   File: statistical.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_p_and_ttest_value(list_a, list_b):

    """
    Takes in two lists and returns t-test and p value
    can be used to establish correlation between author gender and word usage
    also used for null hypothesis testing

    :param list_a: List of things to compare to list_b
    :param list_b: List of things to compare to list_a
    :return: (ttest value , p value)
    """

    ttest_p_value = stats.ttest_ind(list_a, list_b, equal_var=False)
    return ttest_p_value 
Example 52
Project: gender_analysis   Author: dhmit   File: statistical.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def ind_ttest(array1, array2, pvalue_target=0.05):
    """
    Independent t-test for two independent variables

    :param array1: array-like, data for one category. e.g. he/she distance in novels authored by
        women
    :param array2: array-like, data for second category. e.g. he/she distance in novels authored
        by men
    :param pvalue_target: largest p-value for which we consider the test statistically significant
    :return: True if the difference in the means of the two arrays are significant, False otherwise

    >>> a1 = np.array([1, 2, 3, 4, 5])
    >>> a2 = np.array([1, 2, 3, 4, 5])
    >>> ind_ttest(a1, a2)
    False

    >>> a3 = np.array([3, 4, 8, 6, 2])
    >>> a4 = np.array([14, 8, 17, 9, 16])
    >>> ind_ttest(a3, a4)
    True

    """

    # don't assume that the two variables have equal standard deviation
    pvalue = stats.ttest_ind(array1, array2, equal_var=False)[1]

    return pvalue < pvalue_target 
Example 53
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_zero_division(self):
        t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore')
            assert_equal((np.abs(t), p), (np.inf, 0))
            assert_array_equal(mstats.ttest_ind([0, 0, 0], [0, 0, 0]),
                               (np.nan, np.nan)) 
Example 54
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)

        # Check equal_var
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        assert_allclose(res4, res5)
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        assert_allclose(res4, res5) 
Example 55
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_fully_masked(self):
        np.random.seed(1234567)
        outcome = ma.masked_array(np.random.randn(3, 2), mask=[[1, 1, 1], [0, 0, 0]])
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore')
            assert_array_equal(mstats.ttest_ind(outcome[:, 0], outcome[:, 1]),
                               (np.nan, np.nan))
            assert_array_equal(mstats.ttest_ind([np.nan, np.nan], [1.0, 2.0]),
                               (np.nan, np.nan)) 
Example 56
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 57
Project: poker   Author: surgebiswas   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_empty(self):
        res1 = mstats.ttest_ind([], [])
        assert_(np.all(np.isnan(res1))) 
Example 58
Project: geo-tweet   Author: YixuanLi   File: ttest.py    GNU General Public License v2.0 5 votes vote down vote up
def cal_pearson(self):
		if os.path.exists(self.outputDir):
			os.remove(self.outputDir)
		with open(self.outputDir,'a') as fout:
			fout.write("LIWC_attribute"+'\t'+"t-statistic"+'\t'+"p_value\n")
		for i in range(self.LIWC_attribute):
			vec1 = self.data_geo[:,i]
			vec2 = self.data_non_geo[:,i]
			#print vec1
			#print vec2
			t,pval = ttest_ind(vec1,vec2)
			with open(self.outputDir,'a') as fout:
				string = self.LIWC[i] +'\t'+str(t)+'\t'+str(pval)+'\n'
				fout.write(string) 
Example 59
Project: pl2mind   Author: MRN-Code   File: sz_analysis.py    GNU General Public License v2.0 5 votes vote down vote up
def get_sz_info(dataset, activations):
    """
    Get schizophrenia classification experiment related info from activations.
    Info is a 2-sided t test for each latent variable of healthy vs control.

    Parameters
    ----------
    dataset: pylearn2.datasets.DenseDesignMatrix
        Dataset must be in dataset_info.sz_datasets.
        Labels must be in {0, 1}. Singleton labels not tested ({0}) and will
        likely not work.
    activations: numpy array_like
        Activations from which to ttest sz statistics.

    Returns
    -------
    ttests: list of tuples
        The 2-sided ttest (t, p) for each latent variable.
    """

    if dataset.dataset_name not in dataset_info.sz_datasets:
        raise ValueError("Dataset %s not designated as sz classification,"
                         "please edit \"datasets/dataset_info.py\""
                         "if you are sure this is an sz classification related"
                         "dataset" % dataset.dataset_name)
    logger.info("t testing features for relevance to Sz.")
    labels = dataset.y
    assert labels is not None
    for label in labels:
        assert label == 0 or label == 1
    sz_idx = [i for i in range(len(labels)) if labels[i] == 1]
    h_idx = [i for i in range(len(labels)) if labels[i] == 0]

    sz_acts = activations[sz_idx]
    h_acts = activations[h_idx]

    ttests = []
    for sz_act, h_act in zip(sz_acts.T, h_acts.T):
        ttests.append(ttest_ind(h_act, sz_act))

    return ttests 
Example 60
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_zero_division(self):
        t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
        assert_equal((np.abs(t), p), (np.inf, 0))

        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in absolute")
            t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0])
            assert_array_equal(t, np.array([np.nan, np.nan]))
            assert_array_equal(p, np.array([np.nan, np.nan])) 
Example 61
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)

        # Check equal_var
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        assert_allclose(res4, res5)
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        assert_allclose(res4, res5) 
Example 62
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_fully_masked(self):
        np.random.seed(1234567)
        outcome = ma.masked_array(np.random.randn(3, 2), mask=[[1, 1, 1], [0, 0, 0]])
        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in absolute")
            for pair in [(outcome[:, 0], outcome[:, 1]), ([np.nan, np.nan], [1.0, 2.0])]:
                t, p = mstats.ttest_ind(*pair)
                assert_array_equal(t, (np.nan, np.nan))
                assert_array_equal(p, (np.nan, np.nan)) 
Example 63
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 64
Project: P3_image_processing   Author: latedude2   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_empty(self):
        res1 = mstats.ttest_ind([], [])
        assert_(np.all(np.isnan(res1))) 
Example 65
Project: SyntheticStatistics   Author: BlissChapman   File: compute_univariate_power.py    MIT License 5 votes vote down vote up
def power_calculations(d1, d2, n_1, n_2, alpha=0.05, k=10**3):
    # Use boostrap technique to estimate the distribution of the
    #  p-value statistic
    two_sample_t_test_p_value_dist = []
    #mmd_test_stat_dist = []
    for br in range(k):
        d1_replicate = np.random.choice(d1, size=n_1, replace=True)
        d2_replicate = np.random.choice(d2, size=n_2, replace=True)

        # Classical two sample t test
        two_sample_t_test = ttest_ind(d1_replicate, d2_replicate, equal_var=True)
        two_sample_t_test_p_value_dist.append(two_sample_t_test.pvalue)

        # MMD statistic
        # d1_replicate = np.expand_dims(d1_replicate, 1)
        # d2_replicate = np.expand_dims(d2_replicate, 1)
        # mmd_stat = mmd(d1_replicate, d2_replicate)[1]
        # mmd_test_stat_dist.append(mmd_stat)

    # Use monte carlo to estimate the power of a test with significance level 0.05
    #    => average number of p values less than alpha
    #    => average number of MMD statistics greater than alpha
    two_sample_t_test_power = np.mean([p < alpha for p in two_sample_t_test_p_value_dist])
    # mmd_test_power = np.mean([mmd_stat > alpha for mmd_stat in mmd_test_stat_dist])

    return two_sample_t_test_power  # , mmd_test_power


# Compute power for a test between the real and syn distributions respectively 
Example 66
Project: SyntheticStatistics   Author: BlissChapman   File: multiple_comparison.py    MIT License 5 votes vote down vote up
def rejecting_voxels(d1, d2, alpha=0.05):
    two_sample_t_test_p_vals_by_voxel = np.zeros(d1.shape[1:])

    for i in range(two_sample_t_test_p_vals_by_voxel.shape[0]):
        for j in range(two_sample_t_test_p_vals_by_voxel.shape[1]):
            for k in range(two_sample_t_test_p_vals_by_voxel.shape[2]):
                d1_voxels = d1[:, i, j, k]
                d2_voxels = d2[:, i, j, k]
                two_sample_t_test_p_vals_by_voxel[i][j][k] = ttest_ind(d1_voxels, d2_voxels, equal_var=True).pvalue

    return fdr_correction(two_sample_t_test_p_vals_by_voxel, alpha=alpha) 
Example 67
Project: SyntheticStatistics   Author: BlissChapman   File: univariate_power_analysis.py    MIT License 5 votes vote down vote up
def power_calculations(d1, d2, n_1, n_2, alpha=0.05, k=50):
    # Use boostrap technique to estimate the distribution of the
    #  p-value statistic
    two_sample_t_test_p_value_dist = []
    mmd_rejections = []
    for br in range(k):
        d1_replicate = np.random.choice(d1, size=n_1, replace=True)
        d2_replicate = np.random.choice(d2, size=n_2, replace=True)

        # Classical two sample t test
        two_sample_t_test = ttest_ind(d1_replicate, d2_replicate, equal_var=True)
        two_sample_t_test_p_value_dist.append(two_sample_t_test.pvalue)

        # MMD statistic
        d1_replicate = np.expand_dims(d1_replicate, 1)
        d2_replicate = np.expand_dims(d2_replicate, 1)
        mmd_reject = mmd(d1_replicate, d2_replicate, sigma=None, alpha=alpha, k=100)
        mmd_rejections.append(mmd_reject)

    # Use monte carlo to estimate the power of a test with significance level of alpha
    #    => average number of p values less than alpha
    #    => average number of MMD rejections
    two_sample_t_test_power = np.mean([p < alpha for p in two_sample_t_test_p_value_dist])
    mmd_test_power = np.mean(mmd_rejections)
    return two_sample_t_test_power, mmd_test_power


# Compute power for various n 
Example 68
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_zero_division(self):
        t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1])
        assert_equal((np.abs(t), p), (np.inf, 0))

        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in absolute")
            t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0])
            assert_array_equal(t, np.array([np.nan, np.nan]))
            assert_array_equal(p, np.array([np.nan, np.nan])) 
Example 69
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)

        # Check equal_var
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True)
        assert_allclose(res4, res5)
        res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False)
        assert_allclose(res4, res5) 
Example 70
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_fully_masked(self):
        np.random.seed(1234567)
        outcome = ma.masked_array(np.random.randn(3, 2), mask=[[1, 1, 1], [0, 0, 0]])
        with suppress_warnings() as sup:
            sup.filter(RuntimeWarning, "invalid value encountered in absolute")
            for pair in [(outcome[:, 0], outcome[:, 1]), ([np.nan, np.nan], [1.0, 2.0])]:
                t, p = mstats.ttest_ind(*pair)
                assert_array_equal(t, (np.nan, np.nan))
                assert_array_equal(p, (np.nan, np.nan)) 
Example 71
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_result_attributes(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True) 
Example 72
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    MIT License 5 votes vote down vote up
def test_empty(self):
        res1 = mstats.ttest_ind([], [])
        assert_(np.all(np.isnan(res1))) 
Example 73
Project: DataComp   Author: Cojabi   File: stats.py    Apache License 2.0 4 votes vote down vote up
def test_num_feats(zipper, feat_subset=None, method=None):
    """
    Performs a hypothesis test to check if the value distributions of numerical features deviate signifcantly between
    the datasets. Currently t-test as a parametric and U-test as a non-parametric test are supported.

    :param zipper: Dictionary storing the feature values of the datasets in a list. Feature name is used as the key.
    :param feat_subset: A list containing feature names. If given, analysis will only be performed for the contained \
    features. If not given all features will be considered.
    :param method: Specify which statistical test should be used. "u" for Mann-Whitney-U-test, "t" for t-test and \
    "wilcoxon" for a Wilcoxon signed rank test.
    :return: dictionary storing the p_values of the analysis. Feature names are used as keys.
    """

    # if no method is specified used Mann-Whitney-U-test as standard
    if method is None:
        method = "u"

    # initialize dictionary which stores the p_values
    p_values = dict()

    if feat_subset is None:
        feat_subset = zipper.keys()

    for feat in feat_subset:  # run through all variables

        # initiate dict in dict for d1 vs d2, d2 vs d3 etc. per feature
        p_values[feat] = dict()

        for i in range(len(zipper[feat]) - 1):  # select dataset1
            for j in range(i + 1, len(zipper[feat])):  # select dataset2

                # handle the case that all values of current feature are equal across current datasets
                if _test_if_all_vals_equal(zipper[feat][i], zipper[feat][j]):
                    warnings.warn(
                        "Values of \"{}\" are the identical across the two datasets. It will be skipped.".format(feat),
                        UserWarning)
                    # delete already created dict for i, j in p_values and continue with next feature
                    del p_values[feat]
                    continue

                # only calculate score if there are values in each dataset
                if zipper[feat][i] and zipper[feat][j]:
                    # calculate u-test and return p-value
                    if method == "u":
                        stat_test_result = mannwhitneyu(zipper[feat][i], zipper[feat][j], alternative="two-sided")
                    # calculate t-test and return p-value
                    elif method == "t":
                        stat_test_result = ttest_ind(zipper[feat][i], zipper[feat][j])
                    elif method == "wilcoxon":
                        stat_test_result = wilcoxon(zipper[feat][i], zipper[feat][j])

                    p_values[feat][i + 1, j + 1] = stat_test_result.pvalue

                # if one or both sets are empty
                else:
                    p_values[feat][i + 1, j + 1] = np.nan

    return p_values 
Example 74
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 4 votes vote down vote up
def test_ttest_rel():
    # regression test
    tr,pr = 0.81248591389165692, 0.41846234511362157
    tpr = ([tr,-tr],[pr,pr])

    rvs1 = np.linspace(1,100,100)
    rvs2 = np.linspace(1.01,99.989,100)
    rvs1_2D = np.array([np.linspace(1,100,100), np.linspace(1.01,99.989,100)])
    rvs2_2D = np.array([np.linspace(1.01,99.989,100), np.linspace(1,100,100)])

    t,p = stats.ttest_rel(rvs1, rvs2, axis=0)
    assert_array_almost_equal([t,p],(tr,pr))
    t,p = stats.ttest_rel(rvs1_2D.T, rvs2_2D.T, axis=0)
    assert_array_almost_equal([t,p],tpr)
    t,p = stats.ttest_rel(rvs1_2D, rvs2_2D, axis=1)
    assert_array_almost_equal([t,p],tpr)

    # test on 3 dimensions
    rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
    rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
    t,p = stats.ttest_rel(rvs1_3D, rvs2_3D, axis=1)
    assert_array_almost_equal(np.abs(t), tr)
    assert_array_almost_equal(np.abs(p), pr)
    assert_equal(t.shape, (2, 3))

    t,p = stats.ttest_rel(np.rollaxis(rvs1_3D,2), np.rollaxis(rvs2_3D,2), axis=2)
    assert_array_almost_equal(np.abs(t), tr)
    assert_array_almost_equal(np.abs(p), pr)
    assert_equal(t.shape, (3, 2))

    olderr = np.seterr(all='ignore')
    try:
        # test zero division problem
        t,p = stats.ttest_rel([0,0,0],[1,1,1])
        assert_equal((np.abs(t),p), (np.inf, 0))
        assert_equal(stats.ttest_rel([0,0,0], [0,0,0]), (np.nan, np.nan))

        # check that nan in input array result in nan output
        anan = np.array([[1,np.nan],[-1,1]])
        assert_equal(stats.ttest_ind(anan, np.zeros((2,2))),([0, np.nan], [1,np.nan]))
    finally:
        np.seterr(**olderr)

    # test incorrect input shape raise an error
    x = np.arange(24)
    assert_raises(ValueError, stats.ttest_rel, x.reshape((8, 3)),
                  x.reshape((2, 3, 4))) 
Example 75
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_stats.py    GNU General Public License v3.0 4 votes vote down vote up
def test_ttest_ind():
    # regression test
    tr = 1.0912746897927283
    pr = 0.27647818616351882
    tpr = ([tr,-tr],[pr,pr])

    rvs2 = np.linspace(1,100,100)
    rvs1 = np.linspace(5,105,100)
    rvs1_2D = np.array([rvs1, rvs2])
    rvs2_2D = np.array([rvs2, rvs1])

    t,p = stats.ttest_ind(rvs1, rvs2, axis=0)
    assert_array_almost_equal([t,p],(tr,pr))
    t,p = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0)
    assert_array_almost_equal([t,p],tpr)
    t,p = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1)
    assert_array_almost_equal([t,p],tpr)

    # test on 3 dimensions
    rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
    rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
    t,p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=1)
    assert_almost_equal(np.abs(t), np.abs(tr))
    assert_array_almost_equal(np.abs(p), pr)
    assert_equal(t.shape, (2, 3))

    t,p = stats.ttest_ind(np.rollaxis(rvs1_3D,2), np.rollaxis(rvs2_3D,2), axis=2)
    assert_array_almost_equal(np.abs(t), np.abs(tr))
    assert_array_almost_equal(np.abs(p), pr)
    assert_equal(t.shape, (3, 2))

    olderr = np.seterr(all='ignore')
    try:
        # test zero division problem
        t,p = stats.ttest_ind([0,0,0],[1,1,1])
        assert_equal((np.abs(t),p), (np.inf, 0))
        assert_equal(stats.ttest_ind([0,0,0], [0,0,0]), (np.nan, np.nan))

        # check that nan in input array result in nan output
        anan = np.array([[1,np.nan],[-1,1]])
        assert_equal(stats.ttest_ind(anan, np.zeros((2,2))),([0, np.nan], [1,np.nan]))
    finally:
        np.seterr(**olderr) 
Example 76
Project: avocado-vt   Author: avocado-framework   File: regression.py    GNU General Public License v2.0 4 votes vote down vote up
def getTtestPvalue(self, fs_dict1, fs_dict2, paired=None, ratio=None):
        """
        scipy lib is used to compute p-value of Ttest
        scipy: http://www.scipy.org/
        t-test: http://en.wikipedia.org/wiki/Student's_t-test
        """
        try:
            from scipy import stats
            import numpy as np
        except ImportError:
            print("No python scipy/numpy library installed!")
            return None

        ret = []
        s1 = self._process_files(fs_dict1, self._get_list_self, merge=False)
        s2 = self._process_files(fs_dict2, self._get_list_self, merge=False)
        # s*[line][col] contians items (line*col) of all sample files

        for line in range(len(s1)):
            tmp = []
            if type(s1[line]) != list:
                tmp = s1[line]
            else:
                if len(s1[line][0]) < 2:
                    continue
                for col in range(len(s1[line])):
                    avg1 = self._get_list_avg(s1[line][col])
                    avg2 = self._get_list_avg(s2[line][col])
                    sample1 = np.array(s1[line][col])
                    sample2 = np.array(s2[line][col])
                    warnings.simplefilter("ignore", RuntimeWarning)
                    if (paired):
                        if (ratio):
                            (_, p) = stats.ttest_rel(np.log(sample1), np.log(sample2))
                        else:
                            (_, p) = stats.ttest_rel(sample1, sample2)
                    else:
                        (_, p) = stats.ttest_ind(sample1, sample2)
                    flag = "+"
                    if float(avg1) > float(avg2):
                        flag = "-"
                    tmp.append(flag + "%f" % (1 - p))
                tmp = "|".join(tmp)
            ret.append(tmp)
        return ret 
Example 77
Project: vnpy_crypto   Author: birforce   File: test_weightstats.py    MIT License 4 votes vote down vote up
def test_ttest_2sample(self):
        x1, x2 = self.x1, self.x2
        x1r, x2r = self.x1r, self.x2r
        w1, w2 = self.w1, self.w2

        # Note: stats.ttest_ind handles 2d/nd arguments
        res_sp = stats.ttest_ind(x1r, x2r)
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2],
                            res_sp, 14)

        # check correct ttest independent of user ddof
        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                          DescrStatsW(x2, weights=w2, ddof=1))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                          DescrStatsW(x2, weights=w2, ddof=2))
        assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14)

        cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                           DescrStatsW(x2, weights=w2, ddof=0))
        cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0),
                           DescrStatsW(x2, weights=w2, ddof=1))
        cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1),
                           DescrStatsW(x2, weights=w2, ddof=2))

        res0 = cm0.ttest_ind(usevar='unequal')
        res1 = cm1.ttest_ind(usevar='unequal')
        res2 = cm2.ttest_ind(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        # check confint independent of user ddof
        res0 = cm0.tconfint_diff(usevar='pooled')
        res1 = cm1.tconfint_diff(usevar='pooled')
        res2 = cm2.tconfint_diff(usevar='pooled')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)

        res0 = cm0.tconfint_diff(usevar='unequal')
        res1 = cm1.tconfint_diff(usevar='unequal')
        res2 = cm2.tconfint_diff(usevar='unequal')
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14) 
Example 78
Project: Computable   Author: ktraunmueller   File: test_stats.py    MIT License 4 votes vote down vote up
def test_ttest_rel():
    # regression test
    tr,pr = 0.81248591389165692, 0.41846234511362157
    tpr = ([tr,-tr],[pr,pr])

    rvs1 = np.linspace(1,100,100)
    rvs2 = np.linspace(1.01,99.989,100)
    rvs1_2D = np.array([np.linspace(1,100,100), np.linspace(1.01,99.989,100)])
    rvs2_2D = np.array([np.linspace(1.01,99.989,100), np.linspace(1,100,100)])

    t,p = stats.ttest_rel(rvs1, rvs2, axis=0)
    assert_array_almost_equal([t,p],(tr,pr))
    t,p = stats.ttest_rel(rvs1_2D.T, rvs2_2D.T, axis=0)
    assert_array_almost_equal([t,p],tpr)
    t,p = stats.ttest_rel(rvs1_2D, rvs2_2D, axis=1)
    assert_array_almost_equal([t,p],tpr)

    # test on 3 dimensions
    rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
    rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
    t,p = stats.ttest_rel(rvs1_3D, rvs2_3D, axis=1)
    assert_array_almost_equal(np.abs(t), tr)
    assert_array_almost_equal(np.abs(p), pr)
    assert_equal(t.shape, (2, 3))

    t,p = stats.ttest_rel(np.rollaxis(rvs1_3D,2), np.rollaxis(rvs2_3D,2), axis=2)
    assert_array_almost_equal(np.abs(t), tr)
    assert_array_almost_equal(np.abs(p), pr)
    assert_equal(t.shape, (3, 2))

    olderr = np.seterr(all='ignore')
    try:
        # test zero division problem
        t,p = stats.ttest_rel([0,0,0],[1,1,1])
        assert_equal((np.abs(t),p), (np.inf, 0))
        assert_equal(stats.ttest_rel([0,0,0], [0,0,0]), (np.nan, np.nan))

        # check that nan in input array result in nan output
        anan = np.array([[1,np.nan],[-1,1]])
        assert_equal(stats.ttest_ind(anan, np.zeros((2,2))),([0, np.nan], [1,np.nan]))
    finally:
        np.seterr(**olderr) 
Example 79
Project: Computable   Author: ktraunmueller   File: test_stats.py    MIT License 4 votes vote down vote up
def test_ttest_ind():
    # regression test
    tr = 1.0912746897927283
    pr = 0.27647818616351882
    tpr = ([tr,-tr],[pr,pr])

    rvs2 = np.linspace(1,100,100)
    rvs1 = np.linspace(5,105,100)
    rvs1_2D = np.array([rvs1, rvs2])
    rvs2_2D = np.array([rvs2, rvs1])

    t,p = stats.ttest_ind(rvs1, rvs2, axis=0)
    assert_array_almost_equal([t,p],(tr,pr))
    t,p = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0)
    assert_array_almost_equal([t,p],tpr)
    t,p = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1)
    assert_array_almost_equal([t,p],tpr)

    # test on 3 dimensions
    rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
    rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
    t,p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=1)
    assert_almost_equal(np.abs(t), np.abs(tr))
    assert_array_almost_equal(np.abs(p), pr)
    assert_equal(t.shape, (2, 3))

    t,p = stats.ttest_ind(np.rollaxis(rvs1_3D,2), np.rollaxis(rvs2_3D,2), axis=2)
    assert_array_almost_equal(np.abs(t), np.abs(tr))
    assert_array_almost_equal(np.abs(p), pr)
    assert_equal(t.shape, (3, 2))

    olderr = np.seterr(all='ignore')
    try:
        # test zero division problem
        t,p = stats.ttest_ind([0,0,0],[1,1,1])
        assert_equal((np.abs(t),p), (np.inf, 0))
        assert_equal(stats.ttest_ind([0,0,0], [0,0,0]), (np.nan, np.nan))

        # check that nan in input array result in nan output
        anan = np.array([[1,np.nan],[-1,1]])
        assert_equal(stats.ttest_ind(anan, np.zeros((2,2))),([0, np.nan], [1,np.nan]))
    finally:
        np.seterr(**olderr) 
Example 80
Project: DialogEntailment   Author: nouhadziri   File: visualize.py    MIT License 4 votes vote down vote up
def plot_coherence(gen_df, groundtruth_df, is_human_judgment_included, plot_prefix, plots_dir):
    coherence_metrics = ["context_label", "Utt_-1_label", "Utt_-2_label"]
    axis_labels = {
        "context_label": "Predicted Class",
        "Utt_-1_label": "Predicted Class (Utt_-1)",
        "Utt_-2_label": "Predicted Class (Utt_-2)"
    }

    for x in coherence_metrics:
        plt.figure()
        sns.countplot(x=x, data=gen_df)
        _save_or_show(plots_dir, f"{plot_prefix}_{x}_bar")

        plt.figure()
        sns.countplot(x=x, data=groundtruth_df)
        _save_or_show(plots_dir, f"{plot_prefix}_{x}_gt_bar", show=False)

        if is_human_judgment_included:
            plt.figure()
            sns.countplot(x=x, hue="human_infer_label", data=gen_df)
            _save_or_show(plots_dir, f"{plot_prefix}_{x}_human_infer_bar", show=False)

            plt.figure()
            fig = sns.boxplot(x=x, y="human_score", data=gen_df,
                              fliersize=3, linewidth=1.5,
                              order=["entailment", "neutral", "contradiction"])
            fig.set(ylabel='Human Score', xlabel=axis_labels[x])
            _save_or_show(plots_dir, f"{plot_prefix}_{x}_human_score_box", show=False)

            plt.figure()
            sns.lineplot(x=x, y="human_score", data=gen_df, err_style="bars")
            _save_or_show(plots_dir, f"{plot_prefix}_{x}_human_score_line", show=False)

            correct_df = gen_df[gen_df[x] == gen_df["human_infer_label"]]
            accuracy = 100.0 * len(correct_df) / len(gen_df)
            print("*****")
            human_score_entail = gen_df[gen_df[x] == "entailment"]["human_score"]
            human_score_contra = gen_df[gen_df[x] == "contradiction"]["human_score"]
            human_score_neutral = gen_df[gen_df[x] == "neutral"]["human_score"]
            print(f"t-test Human Score entailment vs contradiction {ttest_ind(human_score_entail, human_score_contra)}")
            print(f"t-test Human Score entailment vs neutral {ttest_ind(human_score_entail, human_score_neutral)}")
            print(f"t-test Human Score neutral vs contradiction {ttest_ind(human_score_neutral, human_score_contra)}")
            print("---")
            print(f"Inference {x} Accuracy: {accuracy:.2f}% ({len(correct_df)}/{len(gen_df)})")

            for label in entailment_labels:
                n_corrects_per_label = len(correct_df[correct_df[x] == label])
                n_instances_per_label = len(gen_df[gen_df[x] == label])
                label_accuracy = 100.0 * math.safe_div(n_corrects_per_label, n_instances_per_label)
                print(f"  {x} {label}: {label_accuracy:.2f} ({n_corrects_per_label}/{n_instances_per_label})")

            n_gt_corrects = len(groundtruth_df[groundtruth_df[x] == "entailment"])
            gt_accuracy = 100.0 * n_gt_corrects / len(gen_df)
            print(f"GroundTruth {x} Accuracy: {gt_accuracy:.2f}% ({n_gt_corrects}/{len(gen_df)})")

            for label in entailment_labels[1:]:
                n_incorrects_per_label = len(groundtruth_df[groundtruth_df[x] == label])
                print(f"  {x} {label}: {n_incorrects_per_label}")

    plt.close('all')