Python rpy2.robjects.FloatVector() Examples

The following are 21 code examples of rpy2.robjects.FloatVector(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rpy2.robjects , or try the search function .
Example #1
Source File: test_serialization.py    From rpy2 with GNU General Public License v2.0 6 votes vote down vote up
def test_pickle():
    tmp_file = tempfile.NamedTemporaryFile()
    robj = robjects.baseenv["pi"]
    pickle.dump(robj, tmp_file)
    tmp_file.flush()
    tmp_file.seek(0)
    robj_again = pickle.load(tmp_file)
    tmp_file.close()

    assert isinstance(robj, robjects.FloatVector)

    # Check that underlying R objects are identical.
    assert robjects.baseenv["identical"](robj,
                                         robj_again)[0]
    # Check the instance dict is also identical
    assert set(robj.__dict__.keys()) == set(robj_again.__dict__.keys()) 
Example #2
Source File: moment_est.py    From OptimalPortfolio with MIT License 6 votes vote down vote up
def sample_coM3(invariants):
    """
    Calculates sample third order co-moment matrix
    Taps into the R package PerformanceAnalytics through rpy2

    :param invariants: sample data of market invariants
    :type invariants: pd.Dataframe
    :param frequency: time horizon of projection, default set ot 252 days
    :type frequency: int
    :return: sample skew dataframe
    """
    
    importr('PerformanceAnalytics')
    if not isinstance(invariants, pd.DataFrame):
        warnings.warn("invariants not a pd.Dataframe", RuntimeWarning)
        invariants = pd.DataFrame(invariants)
    p = invariants.shape[1]
    coskew_function = robjects.r('M3.MM')
    r_inv_vec = robjects.FloatVector(np.concatenate(invariants.values))
    r_invariants = robjects.r.matrix(r_inv_vec,nrow=p,ncol=p)
    r_M3 = coskew_function(r_invariants)
    
    return np.matrix(r_M3) 
Example #3
Source File: moment_est.py    From OptimalPortfolio with MIT License 6 votes vote down vote up
def sample_coM4(invariants):
    """
    Calculates sample fourth order co-moment matrix
    Taps into the R package PerformanceAnalytics through rpy2

    :param invariants: sample data of market invariants
    :type invariants: pd.Dataframe
    :param frequency: time horizon of projection, default set ot 252 days
    :type frequency: int
    :return: sample skew dataframe
    """
    
    importr('PerformanceAnalytics')
    if not isinstance(invariants, pd.DataFrame):
        warnings.warn("invariants not a pd.Dataframe", RuntimeWarning)
        invariants = pd.DataFrame(invariants)
    p = invariants.shape[1]
    coskew_function = robjects.r('M4.MM')
    r_inv_vec = robjects.FloatVector(np.concatenate(invariants.values))
    r_invariants = robjects.r.matrix(r_inv_vec,nrow=p,ncol=p)
    r_M4 = coskew_function(r_invariants)
    
    return np.matrix(r_M4) 
Example #4
Source File: RComparisonBase.py    From tbats with MIT License 5 votes vote down vote up
def r_tbats(self, y, components):
        components = components.copy()
        if 'seasonal_periods' in components:
            components['seasonal_periods'] = ro.IntVector(components['seasonal_periods'])
        importr('forecast')
        r_bats_func = ro.r['tbats']
        r_forecast = ro.r['forecast']
        r_y = ro.FloatVector(list(y))
        r_model = r_bats_func(r_y, **components)
        summary = r_forecast(r_model)
        # predictions = np.array(summary.rx('fitted')).flatten()
        return summary, r_model 
Example #5
Source File: benchmarks.py    From rpy2 with GNU General Public License v2.0 5 votes vote down vote up
def setup_func(kind):
#-- setup_sum-begin
    n = 20000
    x_list = [random.random() for i in range(n)]
    module = None
    if kind == "array.array":
        import array as module
        res = module.array('f', x_list)
    elif kind == "numpy.array":
        import numpy as module
        res = module.array(x_list, 'f')
    elif kind == "FloatVector":
        import rpy2.robjects as module
        res = module.FloatVector(x_list)
    elif kind == "FloatSexpVector":
        import rpy2.rinterface as module
        module.initr()
        res = module.FloatSexpVector(x_list)
    elif kind == "FloatSexpVector-memoryview-array":
        import rpy2.rinterface as module
        module.initr()
        tmp = module.FloatSexpVector(x_list)
        mv = tmp.memoryview()
        res = array.array(mv.format, mv)
    elif kind == "list":
        res = x_list
    elif kind == "R":
        import rpy2.robjects as module
        res = module.rinterface.FloatSexpVector(x_list)
        module.globalenv['x'] = res
        res = None
#-- setup_sum-end
    else:
        raise ValueError("Unknown kind '%s'" %kind)
    return (res, module) 
Example #6
Source File: test_vector.py    From rpy2 with GNU General Public License v2.0 5 votes vote down vote up
def test_sample_probabilities():
    vec = robjects.IntVector(range(100))
    spl = vec.sample(10, probabilities=robjects.FloatVector([.01] * 100))
    assert len(spl) == 10 
Example #7
Source File: test_vector.py    From rpy2 with GNU General Public License v2.0 5 votes vote down vote up
def test_sequence_to_vector():
    res = robjects.sequence_to_vector((1, 2, 3))
    assert isinstance(res, robjects.IntVector)

    res = robjects.sequence_to_vector((1, 2, 3.0))
    assert isinstance(res, robjects.FloatVector)

    res = robjects.sequence_to_vector(('ab', 'cd', 'ef'))
    assert isinstance(res, robjects.StrVector)

    with pytest.raises(ValueError):
        robjects.sequence_to_vector(list()) 
Example #8
Source File: test_vector.py    From rpy2 with GNU General Public License v2.0 5 votes vote down vote up
def test_float_repr():
    vec = robjects.vectors.FloatVector((1,2,3))
    r = repr(vec).split('\n')
    assert r[-1].startswith('[')
    assert r[-1].endswith(']')
    assert len(r[-1].split(',')) == 3 
Example #9
Source File: test_vector.py    From rpy2 with GNU General Public License v2.0 5 votes vote down vote up
def test_nareal():
    vec = robjects.FloatVector((1.0, 2.0, 3.0))
    vec[0] = robjects.NA_Real
    assert robjects.baseenv['is.na'](vec)[0] is True 
Example #10
Source File: PipelineTransfacMatch.py    From CGATPipelines with MIT License 5 votes vote down vote up
def nullDistPermutations(tfbs_genes, matched_genes, nPerms=1000):
    '''
    Randomly generate a null distribution of genes from the background
    set, match for pCpG and total number of genes.  Calculate
    enrichment of these genes for the given TFBS.  Permuate nPerm times.
    Return a median enrichment and p-value from the empirical
    cumulative frequency distribution.
    '''

    null_dist = []

    for i in range(0, nPerms):
        null_genes = genNullGeneSet(matched_genes)
        null_counts = countTFBSEnrichment(tfbs_genes, null_genes)
        null_enrich = null_counts[0] / float(null_counts[1])
        null_dist.append(null_enrich)

    # null_dist = robjects.FloatVector([float(x) for x in null_dist])
    # null_dist_r = robjects.FloatVector([float(x) for x in null_dist])

    # null_ecdf = ecdf(null_dist)
    out_dict = {}
    out_dict['null'] = null_dist
    out_dict['median'] = np.median(null_enrich)

    return out_dict 
Example #11
Source File: nonpar.py    From DeepIV with MIT License 5 votes vote down vote up
def fit_and_evaluate(x,z,t,y,df):
    '''
    Fit and evaluate non-parametric regression using  Darolles, Fan, Florens and Renault (2011)

    Implemented in the `np` package in R.

    See [the np package documation](https://cran.r-project.org/web/packages/np/np.pdf) for details.
    '''
    npr=importr('np')
    y_R = robjects.FloatVector(list(y.flatten()))
    (x_eval, t_eval), y_true = test_points(df, 10000)
    mod = npr.npregiv(y_R, t, z, x=x, zeval=t_eval, xeval=x_eval,
                    method="Tikhonov", p=0, optim_method ="BFGS")
    return ((y_true - to_array(mod.rx2('phi.eval')))**2).mean() 
Example #12
Source File: RComparisonBase.py    From tbats with MIT License 5 votes vote down vote up
def r_bats(self, y, components):
        components = components.copy()
        if 'seasonal_periods' in components:
            components['seasonal_periods'] = ro.IntVector(components['seasonal_periods'])
        importr('forecast')
        r_bats_func = ro.r['bats']
        r_forecast = ro.r['forecast']
        r_y = ro.FloatVector(list(y))
        r_model = r_bats_func(r_y, **components)
        summary = r_forecast(r_model)
        # predictions = np.array(summary.rx('fitted')).flatten()
        return summary, r_model 
Example #13
Source File: manager.py    From iterativeWGCNA with GNU General Public License v2.0 5 votes vote down vote up
def histogram(self, vline=None, params=None):
        '''
        plot histogram with vline at x=vline
        '''
        self.params['x'] = ro.FloatVector(self.data)
        self.params['labels'] = False

        if params is not None:
            self.params.update(params)

        graphics().hist(**self.params)

        if vline is not None:
            lineParams = {'v': vline, 'col': 'red'}
            graphics().abline(**lineParams) 
Example #14
Source File: plotting.py    From grocsvs with MIT License 5 votes vote down vote up
def barPlot(dict_, keysInOrder=None, printCounts=True, ylim=None, *args, **kwdargs):
    """ Plot a bar plot

    Args:
        dict_: a dictionary of name -> value, where value is the height of the bar
            use a collections.OrderedDict() to easily convey the order of the groups
        keysInOrder: an optional ordering of the keys in dict_ (alternate option to using collections.OrderedDict)
        printCounts: option to print the counts on top of each bar

    additional kwdargs are passed directly to r.barplot()
    """

    if not keysInOrder:
        keysInOrder = dict_.keys()
    
    heights = ro.FloatVector([dict_[key] for key in keysInOrder])

    kwdargs["names.arg"] = ro.StrVector(keysInOrder)

    if ylim is None:
        if printCounts:
            ylim = [min(heights), max(heights)*1.1]
        else:
            ylim = [min(heights), max(heights)]

    x = r.barplot(heights, ylim=ro.FloatVector(ylim), *args, **kwdargs)

    if printCounts:
        heightsStrings = ["{:.2g}".format(height) for height in heights]
        r.text(x, ro.FloatVector(heights), ro.StrVector(heightsStrings), pos=3)
    return x 
Example #15
Source File: qc.py    From grocsvs with MIT License 5 votes vote down vote up
def visualize_report(tenx_sample_infos, outpath):
    try:
        from rpy2 import robjects as ro
        from grocsvs import plotting
        
        
        ro.r.pdf(outpath)

        frag_lengths = [numpy.log10(sample_info["frag_length_info"]["sampled"]) for sample_info in tenx_sample_infos.values()]
        max_ = max(numpy.percentile(cur_frag_lengths, 99.5) for cur_frag_lengths in frag_lengths)

        plotting.ecdf(frag_lengths, tenx_sample_infos.keys(), xlim=[0, max_], main="Fragment length distribution",
            xlab="Fragment length (log10)", legendWhere="bottomright")

        bc_counts = dict((name, sample_info["good_bc_count"]) for name, sample_info in tenx_sample_infos.items())
        plotting.barPlot(bc_counts, main="Number of high-quality barcodes", ylim=[0, 1.1*max(bc_counts.values())])

        # oldpar = r.par(mfrow=[min(3, len(tenx_sample_infos)), 2])
        oldpar = ro.r.par(mfrow=[2,1])

        for name, tenx_sample_info in tenx_sample_infos.items():
            C_Rs = tenx_sample_info["coverage_of_fragments"]
            C_Rs = C_Rs["coverages"] / C_Rs["lengths"].astype(float)

            ro.r.hist(ro.FloatVector(C_Rs), breaks=50, xlab="Fragment coverage by short-reads (C_R)", main=name)
            ro.r.hist(ro.FloatVector(tenx_sample_info["physical_depths"]), breaks=100, xlab="Coverage by long fragments (C_F)",
                main=name)

        ro.r.par(oldpar)

        ro.r["dev.off"]()
    except:
        with open(outpath, "w") as f:
            f.write("[the visual report requires rpy2 to be correctly installed]") 
Example #16
Source File: plotting.py    From svviz with MIT License 4 votes vote down vote up
def ecdf(vectors, labels=None, colors=["red", "blue", "orange", "violet", "green", "brown"],
         xlab="", ylab="cumulative fraction", main="", legendWhere="topleft", 
         lty=1, lwd=1, legendArgs=None, labelsIncludeN=True, **ecdfKwdArgs):
    """ Take a list of lists, convert them to vectors, and plots them sequentially on a CDF """

    if ro is None:
        return

    #print "MEANS:", main
    #for vector, label in zip(convertToVectors, labels):
    #    print label, numpy.mean(vector)
    
    def _expand(item):
        try:
            iter(item)
            return item
        except TypeError:
            return [item] * len(vectors)
            
    
    lty = _expand(lty)
    lwd = _expand(lwd)


    if not "xlim" in ecdfKwdArgs or ecdfKwdArgs["xlim"] is None:
        xlim = [min(min(vector) for vector in vectors if len(vector) > 0),
                max(max(vector) for vector in vectors if len(vector) > 0)]
        ecdfKwdArgs["xlim"] = xlim

    ecdfKwdArgs["xlim"] = ro.FloatVector(xlim)


    started = False
    for i, vector in enumerate(vectors):
        if len(vector) > 0:
            vector = ro.FloatVector(vector)
            ecdfKwdArgs.update({"verticals":True, "do.points":False, 
                                "col.hor":colors[(i)%len(colors)],
                                "col.vert":colors[(i)%len(colors)],
                                "lty":lty[(i)%len(lty)],
                                "lwd":lwd[(i)%len(lwd)]})
            ecdf = r.ecdf(vector)

            if not started:
                r.plot(ecdf, main=main, xlab=xlab, ylab=ylab, **ecdfKwdArgs)
                started = True
            else:
                r.plot(ecdf, add=True, **ecdfKwdArgs)

    if labels is not None:
        if labelsIncludeN:
            labelsWithN = []
            for i, label in enumerate(labels):
                labelsWithN.append(label+" (n=%d)"%len(vectors[i]))
        else:
            labelsWithN = labels
        legendArgs = asdict(legendArgs, {"cex":0.7})
        r.legend(legendWhere, legend=ro.StrVector(labelsWithN), lty=ro.IntVector(lty),
                 lwd=ro.IntVector([lwdi*2 for lwdi in lwd]), col=ro.StrVector(colors),
                 bg="white", **legendArgs) 
Example #17
Source File: pipeline_transfacmatch.py    From CGATPipelines with MIT License 4 votes vote down vote up
def collateEnrichmentOfTFBS(infiles, outfile):
    '''
    Concatenate the enrichment scores for all gene sets into a single table
    and recalculate qvalues.
    '''

    def _fetch(table_name):
        table_name = table_name.replace("-", "_")
        # retrieve table
        dbh = sqlite3.connect(PARAMS["database_name"])
        cc = dbh.cursor()
        data = cc.execute("SELECT * FROM %s" % table_name).fetchall()
        cc.close()

        # return pandas dataframe
        headers = [d[0] for d in cc.description]
        return pandas.DataFrame.from_records(data, columns=headers)

    first = True
    for infile in infiles:
        table_name = filenameToTablename(P.snip(os.path.basename(infile)))
        table_name = table_name + "_significance"
        geneset_id = table_name.split("_")[0]
        if first:
            # set up first dataframe
            first = False
            df = _fetch(table_name)
            # removing qvalue as it will be recalculated
            try:
                df.drop("qvalue", axis=1, inplace=True)
            except ValueError:
                # qvalue not contained in df
                pass
            # adding column for geneset_id
            geneset_id = [geneset_id, ] * len(df.index)
            df["geneset_id"] = geneset_id
            continue

        # append successive dataframes
        df_n = _fetch(table_name)
        df_n.drop("qvalue", axis=1, inplace=True)
        geneset_id = [geneset_id, ] * len(df_n.index)
        df_n["geneset_id"] = geneset_id
        df = df.append(df_n)

    # get globally adjusted pvalues
    p_vals = df["pvalue"].tolist()
    padjpy = R["p.adjust"]
    # returns a python FloatVector object
    q_vals = padjpy(p_vals)
    df["qvalue"] = [x for x in q_vals]

    df.to_csv(outfile, sep="\t", index=False) 
Example #18
Source File: PipelineTransfacMatch.py    From CGATPipelines with MIT License 4 votes vote down vote up
def permuteTFBSEnrich(tfbs_table,
                      fg_gc,
                      bg_gc,
                      nPerms,
                      bg_stat):
    '''
    Generate p-value from empirical cumulative frequency distribution
    for all TFBS by permutation.
    '''
    results_dict = {}
    fg_geneset = fg_gc.index.tolist()
    matched_genes = matchGenesByComposition(bg_gc, fg_gc, bg_stat)
    tfbs_all = set(tfbs_table.index)

    ecdf_py = R['ecdf']
    for tfbs in tfbs_all:
        tfbs_res = {}
        tfbs_genes = TFBSgeneset(tfbs, tfbs_table)
        fg_genes_counters = countTFBSEnrichment(tfbs_genes, fg_geneset)

        n_foregenes = fg_genes_counters[0]
        n_tfbsgenes = fg_genes_counters[1]
        total_fg = len(fg_geneset)
        fore_enrich = n_foregenes / float(n_tfbsgenes)
        if fore_enrich > 0:
            null_perms = nullDistPermutations(tfbs_genes=tfbs_genes,
                                              matched_genes=matched_genes,
                                              nPerms=nPerms)
            null_dist = null_perms['null']
            null_median = null_perms['median']
            null_dist_r = robjects.FloatVector([f for f in null_dist])
            null_ecdf = ecdf_py(null_dist_r)
            null_func = null_ecdf(fore_enrich)
            pvalue = 1.00 - null_func[0]

        else:
            null_median = 0
            pvalue = 1.0

        tfbs_res['nForegenes'] = n_foregenes
        tfbs_res['tForegenes'] = total_fg
        tfbs_res['nTFBSGenes'] = n_tfbsgenes
        tfbs_res['propForeInTFBS'] = fore_enrich
        tfbs_res['null_median'] = null_median
        tfbs_res['pvalue'] = pvalue

        results_dict[tfbs] = tfbs_res

    return results_dict 
Example #19
Source File: handler.py    From lambda-r-survival-stats with Apache License 2.0 4 votes vote down vote up
def calculate_survival_stats(times, events, values_by_record):
    """
    @param times: time elapsed before the event occurs, or when subject is censored
    @param events: 1 indicates event was observed; 0 indicates event did not occur
    @param values_by_record: two dimensional double array.  Each row is the predictor values for a record (ex: gene)
    @return: array where each element contains the hazard ratio and pvalue for the record
    """
    # flatten values of two dimensional array for use by R
    # in R, matrices are simply an array where you specify number of columns per row
    flattened_values = [y for row in values_by_record for y in row]

    t = robjects.FloatVector(times)
    e = robjects.IntVector(events)
    v = robjects.FloatVector(flattened_values)

    # convert flattened values into an R matrix
    m = robjects.r['matrix'](v, nrow=len(values_by_record), byrow=True)

    #load R library
    r('library(survival)')

    # assign variables in R
    r.assign('valuesMatrix', m)
    r.assign('numSamples', len(times))
    r.assign('times', t)
    r.assign('events', e)

    # calculate statistics by applying coxph to each record's values
    logging.debug('Calculating stats')
    r("""res <- apply(valuesMatrix,1, function(values) {
      coxlist = try(coxph(Surv(times,events)~values + cluster(1:numSamples[1])))
      return(c(summary(coxlist)$coefficients[2], summary(coxlist)$coefficients[6]))
      })""")
    logging.debug('Done calculating stats')

    # convert results
    r_res = robjects.r['res']
    res_iter = iter(r_res)
    results = []
    for hazard in res_iter:
        pval = next(res_iter)
        results.append({'hazard': hazard, 'pval': pval})
    return results 
Example #20
Source File: velocity_pseudotime.py    From scvelo with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def principal_curve(data, basis="pca", n_comps=4, clusters_list=None, copy=False):
    """Computes the principal curve
    Arguments
    ---------
    data: :class:`~anndata.AnnData`
        Annotated data matrix.
    basis: `str` (default: `'pca'`)
        Basis to use for computing the principal curve.
    n_comps: `int` (default: 4)
        Number of pricipal components to be used.
    copy: `bool`, (default: `False`)
        Return a copy instead of writing to adata.
    Returns
    -------
    Returns or updates `adata` with the attributes
    principal_curve: `.uns`
        dictionary containing `projections`, `ixsort` and `arclength`
    """
    adata = data.copy() if copy else data
    import rpy2.robjects as robjects
    from rpy2.robjects.packages import importr

    if clusters_list is not None:
        cell_subset = np.array(
            [label in clusters_list for label in adata.obs["clusters"]]
        )
        X_emb = adata[cell_subset].obsm[f"X_{basis}"][:, :n_comps]
    else:
        cell_subset = None
        X_emb = adata.obsm[f"X_{basis}"][:, :n_comps]

    n_obs, n_dim = X_emb.shape

    # convert array to R matrix
    xvec = robjects.FloatVector(X_emb.T.reshape((X_emb.size)))
    X_R = robjects.r.matrix(xvec, nrow=n_obs, ncol=n_dim)

    fit = importr("princurve").principal_curve(X_R)

    adata.uns["principal_curve"] = dict()
    adata.uns["principal_curve"]["ixsort"] = ixsort = np.array(fit[1]) - 1
    adata.uns["principal_curve"]["projections"] = np.array(fit[0])[ixsort]
    adata.uns["principal_curve"]["arclength"] = np.array(fit[2])
    adata.uns["principal_curve"]["cell_subset"] = cell_subset

    return adata if copy else None 
Example #21
Source File: plotting.py    From grocsvs with MIT License 4 votes vote down vote up
def ecdf(vectors, labels=None, colors=["red", "blue", "orange", "violet", "green", "brown"],
         xlab="", ylab="cumulative fraction", main="", legendWhere="topleft", 
         lty=1, lwd=1, legendArgs=None, labelsIncludeN=True, **ecdfKwdArgs):
    """ Take a list of lists, convert them to vectors, and plots them sequentially on a CDF """

    if ro is None:
        return

    #print "MEANS:", main
    #for vector, label in zip(convertToVectors, labels):
    #    print label, numpy.mean(vector)
    
    def _expand(item):
        try:
            iter(item)
            return item
        except TypeError:
            return [item] * len(vectors)
            
    
    lty = _expand(lty)
    lwd = _expand(lwd)


    if not "xlim" in ecdfKwdArgs or ecdfKwdArgs["xlim"] is None:
        xlim = [min(min(vector) for vector in vectors if len(vector) > 0),
                max(max(vector) for vector in vectors if len(vector) > 0)]
        ecdfKwdArgs["xlim"] = xlim

    ecdfKwdArgs["xlim"] = ro.FloatVector(ecdfKwdArgs["xlim"])


    started = False
    for i, vector in enumerate(vectors):
        if len(vector) > 0:
            vector = ro.FloatVector(vector)
            ecdfKwdArgs.update({"verticals":True, "do.points":False, 
                                "col.hor":colors[(i)%len(colors)],
                                "col.vert":colors[(i)%len(colors)],
                                "lty":lty[(i)%len(lty)],
                                "lwd":lwd[(i)%len(lwd)]})
            ecdf = r.ecdf(vector)

            if not started:
                r.plot(ecdf, main=main, xlab=xlab, ylab=ylab, **ecdfKwdArgs)
                started = True
            else:
                r.plot(ecdf, add=True, **ecdfKwdArgs)

    if labels is not None:
        if labelsIncludeN:
            labelsWithN = []
            for i, label in enumerate(labels):
                labelsWithN.append(label+" (n=%d)"%len(vectors[i]))
        else:
            labelsWithN = labels
        legendArgs = asdict(legendArgs, {"cex":0.7})
        r.legend(legendWhere, legend=ro.StrVector(labelsWithN), lty=ro.IntVector(lty),
                 lwd=ro.IntVector([lwdi*2 for lwdi in lwd]), col=ro.StrVector(colors),
                 bg="white", **legendArgs)