Python scipy.stats.stats.pearsonr() Examples

The following are 26 code examples of scipy.stats.stats.pearsonr(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats.stats , or try the search function .
Example #1
Source File: multicollinearity.py    From intro_ds with Apache License 2.0 7 votes vote down vote up
def uncorrelatedVariable(data):
    """
    用不相关的x1,x2搭建回归模型
    """
    # 在Windows下运行此脚本需确保Windows下的命令提示符(cmd)能显示中文
    print("x1和x2的相关系数为:%s" % scss.pearsonr(data["x1"], data["x2"])[0])
    Y = data["y"]
    X = sm.add_constant(data["x1"])
    re = trainModel(X, Y)
    print(re.summary())
    X1 = sm.add_constant(data["x2"])
    re1 = trainModel(X1, Y)
    print(re1.summary())
    X2 = sm.add_constant(data[["x1", "x2"]])
    re2 = trainModel(X2, Y)
    print(re2.summary()) 
Example #2
Source File: run.py    From KitcheNette with Apache License 2.0 6 votes vote down vote up
def evaluation(y_pred, y_true, th):
    # print(y_pred)
    # print(y_true)
    # print(pearsonr(np.ravel(y_pred), y_true))
    corr = pearsonr(np.ravel(y_pred), y_true)[0]
    # mse = np.square(np.subtract(y_pred, y_true)).mean()
    msetotal = mse_at_k(y_pred, y_true, 1.0)
    mse1 = mse_at_k(y_pred, y_true, 0.01)
    mse2 = mse_at_k(y_pred, y_true, 0.02)
    mse5 = mse_at_k(y_pred, y_true, 0.05)

    auroc = float('nan')
    if len([x for x in y_true if x > th]) > 0:
        auroc = roc_auc_score([1 if x > th else 0 for x in y_true], y_pred)
    precision1 = precision_at_k(y_pred, y_true, 0.01, th)
    precision2 = precision_at_k(y_pred, y_true, 0.02, th)
    precision5 = precision_at_k(y_pred, y_true, 0.05, th)
    precision10 = precision_at_k(y_pred, y_true, 0.1, th)
    #print(auroc, precision1, precision2, precision5)
    return (corr, msetotal, mse1, mse2, mse5, auroc, precision1, precision2, precision5, precision10)

# Outputs response embeddings for a given dictionary 
Example #3
Source File: metrics.py    From RRMPG with MIT License 6 votes vote down vote up
def calc_r(obs, sim):
    """Calculate the pearson r coefficient.
    
    Interface to the scipy implementation of the pearson r coeffienct.
    
    Args:
        obs: Array of the observed values
        sim: Array of the simulated values

    Returns:
        The pearson r coefficient of the simulation compared to the observation.
 
    """
    # Validation check on the input arrays
    obs = validate_array_input(obs, np.float64, 'obs')
    sim = validate_array_input(sim, np.float64, 'sim')
    
    if len(obs) != len(sim):
        raise ValueError("Arrays must have the same size.")
    
    return pearsonr(obs, sim) 
Example #4
Source File: multicollinearity.py    From intro_ds with Apache License 2.0 6 votes vote down vote up
def correlatedVariable(data):
    """
    用强相关的x1,x3搭建模型
    """
    print("x1和x3的相关系数为:%s" % scss.pearsonr(data["x1"], data["x3"])[0])
    Y = data["y"]
    X = sm.add_constant(data["x1"])
    re = trainModel(X, Y)
    print(re.summary())
    X1 = sm.add_constant(data["x3"])
    re1 = trainModel(X1, Y)
    print(re1.summary())
    X2 = sm.add_constant(data[["x1", "x3"]])
    re2 = trainModel(X2, Y)
    print(re2.summary())
    # 检测多重共线性
    print("检测假设x1和x3同时不显著:")
    print(re2.f_test(["x1=0", "x3=0"]))
    vif = pd.DataFrame()
    vif["VIF Factor"] = [variance_inflation_factor(X2.values, i) for i in range(X2.shape[1])]
    vif["features"] = X2.columns
    print(vif) 
Example #5
Source File: aggregation.py    From aggregation with Apache License 2.0 6 votes vote down vote up
def __plot_closest_neighbours__(self,zooniverse_id_list):
        totalY = []
        totalDist = []

        for zooniverse_id in zooniverse_id_list:
            if zooniverse_id in self.closet_neighbours:
                pt_l,dist_l = zip(*self.closet_neighbours[zooniverse_id])
                X_pts,Y_pts = zip(*pt_l)

                # find to flip the image
                Y_pts = [-p for p in Y_pts]

                plt.plot(dist_l,Y_pts,'.',color="red")

                totalDist.extend(dist_l)
                totalY.extend(Y_pts)

        print pearsonr(dist_l,Y_pts)
        plt.show() 
Example #6
Source File: aggregation.py    From aggregation with Apache License 2.0 5 votes vote down vote up
def __plot_cluster_size__(self,zooniverse_id_list):
        data = {}

        for zooniverse_id in zooniverse_id_list:
            if self.clusterResults[zooniverse_id] is not None:
                centers,pts,users = self.clusterResults[zooniverse_id]

                Y = [700-c[1] for c in centers]
                X = [len(p) for p in pts]

                plt.plot(X,Y,'.',color="blue")

                for x,y in zip(X,Y):
                    if not(x in data):
                        data[x] = [y]
                    else:
                        data[x].append(y)

        print pearsonr(X,Y)

        X = sorted(data.keys())
        Y = [np.mean(data[x]) for x in X]
        plt.plot(X,Y,'o-')
        plt.xlabel("Cluster Size")
        plt.ylabel("Height in Y-Pixels")
        plt.show() 
Example #7
Source File: keras_utils.py    From Benchmarks with MIT License 5 votes vote down vote up
def evaluate_autoencoder(y_pred, y_test):
    mse = mean_squared_error(y_pred, y_test)
    r2 = r2_score(y_test, y_pred)
    corr, _ = pearsonr(y_pred.flatten(), y_test.flatten())
    # print('Mean squared error: {}%'.format(mse))
    return {'mse': mse, 'r2_score': r2, 'correlation': corr} 
Example #8
Source File: uno_clr_keras2.py    From Benchmarks with MIT License 5 votes vote down vote up
def evaluate_prediction(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    corr, _ = pearsonr(y_true, y_pred)
    return {'mse': mse, 'mae': mae, 'r2': r2, 'corr': corr} 
Example #9
Source File: uno.py    From Benchmarks with MIT License 5 votes vote down vote up
def evaluate_prediction(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    corr, _ = pearsonr(y_true, y_pred)
    return {'mse': mse, 'mae': mae, 'r2': r2, 'corr': corr} 
Example #10
Source File: combo_baseline_keras2.py    From Benchmarks with MIT License 5 votes vote down vote up
def evaluate_prediction(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    corr, _ = pearsonr(y_true, y_pred)
    return {'mse': mse, 'mae': mae, 'r2': r2, 'corr': corr} 
Example #11
Source File: combo_dose.py    From Benchmarks with MIT License 5 votes vote down vote up
def evaluate_prediction(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    corr, _ = pearsonr(y_true, y_pred)
    return {'mse': mse, 'mae': mae, 'r2': r2, 'corr': corr} 
Example #12
Source File: p1b1_baseline_keras2.py    From Benchmarks with MIT License 5 votes vote down vote up
def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(self.validation_data[0])
        r2 = r2_score(self.validation_data[1], y_pred)
        corr, _ = pearsonr(self.validation_data[1].flatten(), y_pred.flatten())
        print("\nval_r2:", r2)
        print(y_pred.shape)
        print("\nval_corr:", corr, "val_r2:", r2)
        print("\n") 
Example #13
Source File: p1b1.py    From Benchmarks with MIT License 5 votes vote down vote up
def evaluate_autoencoder(y_pred, y_test):
    try:
        mse = mean_squared_error(y_pred, y_test)
        r2 = r2_score(y_test, y_pred)
        corr, _ = pearsonr(y_pred.flatten(), y_test.flatten())
        # print('Mean squared error: {}%'.format(mse))
    except:
        #when nan or something else breaks mean_squared_error computation
        # we may check earlier before computation also:
        #np.isnan(y_pred).any() or np.isnan(y_test).any()):
        r2 = 0
        mse = 0
        corr = 0
    return {'mse': mse, 'r2_score': r2, 'correlation': corr} 
Example #14
Source File: correlations.py    From EdwardsLab with MIT License 5 votes vote down vote up
def pairwise(data, headers):
    """
    Calculate pairwise distances
    :param data:
    :param headers:
    :return:
    """

    cols = range(len(headers))

    for i, j in combinations(cols, 2):
        pearson, p = pearsonr(data[i], data[j])
        print("{}\t{}\t{}\t{}".format(headers[i], headers[j], pearson, p)) 
Example #15
Source File: utility.py    From DeepLearn with MIT License 5 votes vote down vote up
def sum_corr(view1,view2,flag=''):
    
    print("test correlation")
    corr = 0
    for i,j in zip(view1,view2):
        corr += measures.pearsonr(i,j)[0]
    print('avg sum corr ::',flag,'::',corr/len(view1)) 
Example #16
Source File: similarity.py    From 4lang with MIT License 5 votes vote down vote up
def main_word_test(cfg):
    from scipy.stats.stats import pearsonr
    word_sim = WordSimilarity(cfg)
    out_dir = cfg.get('word_sim', 'out_dir')
    result_str = 'word1\tword2\tgold\tsim\tdiff\n'

    # TODO: only testing
    # machine = word_sim.lexicon.get_machine('merry-go-round')
    # links, nodes = word_sim.get_links_nodes(machine)
    # machine1 = word_sim.text_to_4lang.process_phrase('federal assembly')
    # nodes1 = word_sim.get_nodes_from_text_machine(machine1)

    test_pairs = get_test_pairs(cfg.get('sim', 'word_test_data'))
    sims, gold_sims = [], []
    for (w1, w2), gold_sim in test_pairs.iteritems():
        sim = word_sim.word_similarities(w1, w2)  # dummy POS-tags
        if sim is None:
            continue
        sim = sim.itervalues().next()
        gold_sims.append(gold_sim)
        sims.append(sim)
        result_str += "{0}\t{1}\t{2}\t{3}\t{4}".format(
            w1, w2, gold_sim, sim, math.fabs(sim - gold_sim)) + "\n"

    print "NO path exist: {0}".format(word_sim.sim_feats.no_path_cnt)
    print "Pearson: {0}".format(pearsonr(gold_sims, sims))
    print_results(out_dir, result_str) 
Example #17
Source File: similarity.py    From 4lang with MIT License 5 votes vote down vote up
def compare(self):
        sims = [self.machine_sims[pair] for pair in self.sorted_word_pairs]
        vec_sims = [self.vec_sims[pair] for pair in self.sorted_word_pairs]

        pearson = pearsonr(sims, vec_sims)
        print "compared {0} distance pairs.".format(len(sims))
        print "Pearson-correlation: {0}".format(pearson) 
Example #18
Source File: tilt_angle_estimation.py    From aitom with GNU General Public License v3.0 5 votes vote down vote up
def wedge_mask_cor(v_abs, ops):

    for op in ops:
        m = tilt_mask(size=v_abs.shape, tilt_ang1=op['ang1'], tilt_ang2=op['ang2'], tilt_axis=op['tilt_axis'],
                            light_axis=op['light_axis'])
        # m = TIVWU.wedge_mask(size=v_abs.shape, ang1=op['ang1'], ang2=op['ang2'], tilt_axis=op['direction'])
        m = m.astype(N.float)

        op['cor'] = float(pearsonr(v_abs.flatten(), m.flatten())[0])

    return ops 
Example #19
Source File: describe.py    From hypertools with MIT License 5 votes vote down vote up
def get_corr(reduced, alldims):
    return pearsonr(alldims.ravel(), reduced.ravel())[0] 
Example #20
Source File: correlation_analysis.py    From copper_price_forecast with GNU General Public License v3.0 5 votes vote down vote up
def cor_analysis(co_price, pcb_price):
    """
    铜价和PCB价格相关性分析 
    """
    cor_draw(co_price, pcb_price)
    print(pearsonr(co_price.values, pcb_price.values)) 
Example #21
Source File: evaluate.py    From OpenKiwi with GNU Affero General Public License v3.0 5 votes vote down vote up
def score_sentence_level(gold, pred):
    pearson = pearsonr(gold, pred)
    mae = mean_absolute_error(gold, pred)
    rmse = np.sqrt(mean_squared_error(gold, pred))

    spearman = spearmanr(
        rankdata(gold, method="ordinal"), rankdata(pred, method="ordinal")
    )
    delta_avg = delta_average(gold, rankdata(pred, method="ordinal"))

    return (pearson[0], mae, rmse), (spearman[0], delta_avg) 
Example #22
Source File: metrics.py    From OpenKiwi with GNU Affero General Public License v3.0 5 votes vote down vote up
def summarize(self):
        pearson = pearsonr(self.predictions, self.target)[0]
        summary = {self.metric_name: pearson}
        return self._prefix_keys(summary) 
Example #23
Source File: utility.py    From DeepLearn with MIT License 5 votes vote down vote up
def cal_sim(model,ind1,ind2=1999):
    view1 = np.load("test_v1.npy")[0:ind1]
    view2 = np.load("test_v2.npy")[0:ind2]
    label1 = np.load('test_l.npy')
    x1 = project(model,[view1,np.zeros_like(view1)])
    x2 = project(model,[np.zeros_like(view2),view2])
    label2 = []
    count = 0
    MAP=0
    for i,j in enumerate(x1):
        cor = []
        AP=0
        for y in x2:
            temp1 = j.tolist()
            temp2 = y.tolist()
            cor.append(pearsonr(temp1,temp2))
        #if i == np.argmax(cor):
        #    count+=1
        #val=[(q,(i*ind1+p))for p,q in enumerate(cor)]
        val=[(q,p)for p,q in enumerate(cor)]
        val.sort()
        val.reverse()
        label2.append(val[0:4])
        t = [w[1]for w in val[0:7]]
        #print t
        for x,y in enumerate(t):
            if y in range(i,i+5):
                AP+=1/(x+1)
        print(t)
        print(AP)
        MAP+=AP
    #print 'accuracy  :- ',float(count)*100/ind1,'%'
    print('MAP is : ',MAP/ind1) 
Example #24
Source File: showxcorrx.py    From rapidtide with Apache License 2.0 4 votes vote down vote up
def _get_null_distribution(indata, xcorr_x, thefilter, prewindow, detrendorder,
                           searchstart, searchend, Fs, dofftcorr,
                           windowfunc='hamming', corrweighting='none',
                           numreps=1000):
    """
    Get an empirical null distribution from the data.
    """
    print('estimating significance distribution using {0} '
          'repetitions'.format(numreps))
    corrlist = zeros(numreps, dtype='float')
    corrlist_pear = zeros(numreps, dtype='float')
    xcorr_x_trim = xcorr_x[searchstart:searchend + 1]

    filteredindata = tide_math.corrnormalize(thefilter.apply(Fs, indata),
                                             prewindow=prewindow,
                                             detrendorder=detrendorder,
                                             windowfunc=windowfunc)
    for i in range(numreps):
        # make a shuffled copy of the regressors
        shuffleddata = permutation(indata)

        # filter it
        filteredshuffleddata = np.nan_to_num(
            tide_math.corrnormalize(thefilter.apply(Fs, shuffleddata),
                                    prewindow=prewindow,
                                    detrendorder=detrendorder,
                                    windowfunc=windowfunc))

        # crosscorrelate with original
        theshuffledxcorr = tide_corr.fastcorrelate(filteredindata,
                                                   filteredshuffleddata,
                                                   usefft=dofftcorr,
                                                   weighting=corrweighting)

        # find and tabulate correlation coefficient at optimal lag
        theshuffledxcorr_trim = theshuffledxcorr[searchstart:searchend + 1]
        maxdelay = xcorr_x_trim[argmax(theshuffledxcorr_trim)]
        corrlist[i] = theshuffledxcorr_trim[argmax(theshuffledxcorr_trim)]

        # find and tabulate correlation coefficient at 0 lag
        corrlist_pear[i] = pearsonr(filteredindata, filteredshuffleddata)[0]

    # return the distribution data
    return corrlist, corrlist_pear 
Example #25
Source File: transformer.py    From rsmtool with Apache License 2.0 4 votes vote down vote up
def find_feature_transform(cls,
                               feature_name,
                               feature_value,
                               scores):
        """
        Identify the best transformation based on the
        highest absolute Pearson correlation with human score.

        Parameters
        ----------
        feature_name: str
            Name of feature for which to find the transformation.
        feature_value: pandas Series
            Series containing feature values.
        scores: pandas Series
            Numeric human scores.

        Returns
        -------
        best_transformation: str
            The name of the transformation which gives the highest correlation
            between the feature values and the human scores. See
            :ref:`documentation <select_transformations_rsmtool>` for the
            full list of transformations.
        """

        # Do not use sqrt and ln for potential negative features.
        # Do not use inv for positive features.
        if any(feature_value < 0):
            applicable_transformations = ['org', 'inv']
        else:
            applicable_transformations = ['org',
                                          'sqrt',
                                          'addOneInv',
                                          'addOneLn']

        correlations = []
        for trans in applicable_transformations:
            try:
                transformed_value = FeatureTransformer.transform_feature(feature_value,
                                                                         feature_name,
                                                                         trans)

                correlations.append(abs(pearsonr(transformed_value, scores)[0]))
            except ValueError:
                # If the transformation returns an error, append 0.
                correlations.append(0)
        best = np.argmax(correlations)
        best_transformation = applicable_transformations[best]
        return best_transformation 
Example #26
Source File: metrics.py    From RRMPG with MIT License 4 votes vote down vote up
def calc_kge(obs, sim):
    """Calculate the Kling-Gupta-Efficiency.
    
    Calculate the original KGE value following [1].

    Args:
        obs: Array of the observed values
        sim: Array of the simulated values

    Returns:
        The KGE value for the simulation, compared to the observation.

    Raises:
        ValueError: If the arrays are not of equal size or have non-numeric
            values.
        TypeError: If the arrays is not a supported datatype.
        RuntimeError: If the mean or the standard deviation of the observations
            equal 0.
    
    [1] Gupta, H. V., Kling, H., Yilmaz, K. K., & Martinez, G. F. (2009). 
    Decomposition of the mean squared error and NSE performance criteria: 
    Implications for improving hydrological modelling. Journal of Hydrology, 
    377(1-2), 80-91.
    
    """
    # Validation check on the input arrays
    obs = validate_array_input(obs, np.float64, 'obs')
    sim = validate_array_input(sim, np.float64, 'sim')
    
    if len(obs) != len(sim):
        raise ValueError("Arrays must have the same size.")
     
    mean_obs = np.mean(obs)
    if mean_obs == 0:
        msg = "KGE not definied if the mean of the observations equals 0."
        raise RuntimeError(msg)
    
    std_obs = np.std(obs)
    if std_obs == 0:
        msg = ["KGE not definied if the standard deviation of the ",
               "observations equals 0."]
        raise RuntimeError("".join(msg))
    
    r = pearsonr(obs, sim)[0]
    alpha = np.std(sim) / std_obs
    beta = np.mean(sim) / mean_obs
    
    kge_val = 1 - np.sqrt((r-1)**2 + (alpha-1)**2 + (beta-1)**2)
    
    return kge_val