Python rpy2.robjects.pandas2ri.ri2py() Examples

The following are 9 code examples of rpy2.robjects.pandas2ri.ri2py(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rpy2.robjects.pandas2ri , or try the search function

Example #1

Source File: r_random_forest_clf.py From 2020plus with Apache License 2.0

6 votes

def load_cv(self, path):
        set_wd_str = 'setwd("{0}")'.format(os.getcwd())
        ro.r(set_wd_str)
        ro.r('load("{0}")'.format(path))
        self.rf_cv = ro.r["trained.models"]
        if new_pandas_flag:
            # rpy2 is a complete joke of a package
            try:
                # use this way for conversion for bugged rpy2 versions
                self.cv_folds = pandas2ri.ri2py(ro.r["cvFoldDf"])
            except:
                # this should be the correct way to convert
                # but several versions of rpy2 have a bug
                self.cv_folds = ro.r["cvFoldDf"]
        else:
            self.cv_folds = com.convert_robj(ro.r["cvFoldDf"])

Example #2

Source File: r_random_forest_clf.py From 2020plus with Apache License 2.0

6 votes

def predict_proba(self, xtest):
        """Predicts the probability for each class.

        Parameters
        ----------
        xtest : pd.DataFrame
            features for test set
        """
        if new_pandas_flag:
            r_xtest = pandas2ri.py2ri(xtest)
        else:
            r_xtest = com.convert_to_r_dataframe(xtest)
        #r_xtest = pandas2ri.ri2py(xtest)
        pred_prob = self.rf_pred_prob(self.rf, r_xtest)
        if new_pandas_flag:
            py_pred_prob = pandas2ri.ri2py(pred_prob)
        else:
            py_pred_prob = com.convert_robj(pred_prob)
            py_pred_prob = py_pred_prob.values
        #py_pred_prob = pandas2ri.ri2py(pred_prob)
        return py_pred_prob

Example #3

Source File: test_R.py From palladium with Apache License 2.0

6 votes

def test_predict_with_pandas_data(self, Model, dataframe):
        X, y = dataframe
        model = Model(scriptname='myscript', funcname='myfunc', some='kwarg')
        model.r['predict'].return_value = numpy.array(
            [[0.1, 0.2, 0.7], [0.8, 0.1, 0.1]])
        model.fit(X, y)

        result = model.predict(X)
        predictargs = model.r['predict'].call_args
        assert predictargs[0][0] is model.rmodel_
        assert (ri2py(predictargs[0][1]).values == X.values).all()
        assert predictargs[1]['type'] == 'prob'
        assert (result ==
                numpy.argmax(model.r['predict'].return_value, axis=1)).all()

        result = model.predict_proba(X)
        assert (result == model.r['predict'].return_value).all()

Example #4

Source File: test_R.py From palladium with Apache License 2.0

6 votes

def test_smoke(self, dataset, model):
        X, y = dataset()
        model.fit(X, y)

        assert model.predict(X).shape == (
            ri2py(y).shape if isinstance(y, Vector) else y.shape)
        score_1 = model.score(X, y)
        assert score_1 >= 0.1

        # Convert X to its Python or R equivalent and check if scores
        # match:
        X_t = py2ri(X) if isinstance(X, DataFrame) else ri2py(X)
        score_2 = model.score(X_t, y)
        assert score_2 == score_1

        # Convert X to a Python list and run the prediction:
        X_t2 = ri2py(X) if not isinstance(X, DataFrame) else X
        X_t2 = X_t2.values.tolist()
        score_3 = model.score(X_t2, y)
        assert score_3 == score_1

Example #5

Source File: r_random_forest_clf.py From 2020plus with Apache License 2.0

5 votes

def predict(self, xtest):
        """Predicts class via majority vote.

        Parameters
        ----------
        xtest : pd.DataFrame
            features for test set
        """
        if new_pandas_flag:
            r_xtest = pandas2ri.py2ri(xtest)
        else:
            r_xtest = com.convert_to_r_dataframe(xtest)
        #r_xtest = pandas2ri.py2ri(xtest)
        pred = self.rf_pred(self.rf, r_xtest)
        if new_pandas_flag:
            #py_pred = pandas2ri.ri2py(pred)
            tmp_genes = pred[1]
            tmp_pred_class = pred[0]
            genes = pandas2ri.ri2py(tmp_genes)
            pred_class = pandas2ri.ri2py(tmp_pred_class)
        else:
            py_pred = com.convert_robj(pred)
            genes, pred_class = zip(*py_pred.items())
            #genes = com.convert_robj(tmp_genes)
            #pred_class = com.convert_robj(tmp_pred_class)
        tmp_df = pd.DataFrame({'pred_class': pred_class},
                              index=genes)
        tmp_df = tmp_df.reindex(xtest.index)
        tmp_df -= 1  # for some reason the class numbers start at 1
        return tmp_df['pred_class']

Example #6

Source File: test_R.py From palladium with Apache License 2.0

5 votes

def test_fit_with_pandas_data(self, Model, dataframe):
        X, y = dataframe
        model = Model(scriptname='myscript', funcname='myfunc', some='kwarg')
        model.fit(X, y)
        funcargs = model.r['myfunc'].call_args
        assert (ri2py(funcargs[0][0]).values == X.values).all()
        assert (ri2py(funcargs[0][1]) == y).all()
        assert funcargs[1]['some'] == 'kwarg'

Example #7

Source File: r_random_forest_clf.py From 2020plus with Apache License 2.0

4 votes

def fit(self, xtrain, ytrain):
        """The fit method trains R's random forest classifier.

        NOTE: the method name ("fit") and method signature were choosen
        to be consistent with scikit learn's fit method.

        Parameters
        ----------
        xtrain : pd.DataFrame
            features for training set
        ytrain : pd.DataFrame
            true class labels (as integers) for training set
        """
        label_counts = ytrain.value_counts()
        if self.is_onco_pred and self.is_tsg_pred:
            sampsize = [label_counts[self.other_num],
                        label_counts[self.onco_num],
                        label_counts[self.tsg_num]]
        elif self.is_onco_pred:
            sampsize = [label_counts[self.other_num],
                        label_counts[self.onco_num]]
        elif self.is_tsg_pred:
            sampsize = [label_counts[self.other_num],
                        label_counts[self.tsg_num]]

        self.set_sample_size(sampsize)
        ytrain.index = xtrain.index  # ensure indexes match
        xtrain['true_class'] = ytrain

        # convert
        if new_pandas_flag:
            r_xtrain = pandas2ri.py2ri(xtrain)
        else:
            r_xtrain = com.convert_to_r_dataframe(xtrain)
        #ro.globalenv['trainData'] = r_xtrain
        self.rf = self.rf_fit(r_xtrain, self.ntrees, self.sample_size)
        r_imp = self.rf_imp(self.rf)  # importance dataframe in R
        if new_pandas_flag:
            self.feature_importances_ = pandas2ri.ri2py(r_imp)
        else:
            self.feature_importances_ = com.convert_robj(r_imp)
        #self.feature_importances_ = pandas2ri.ri2py(r_imp)

Example #8

Source File: r_interface.py From velocyto.py with BSD 2-Clause "Simplified" License

4 votes

def convert_r_obj(v: Any, obj_to_obj: bool=True, verbose: bool=True) -> Any:
        """Function with manually specified conversion from a r-object to a python object
        """
        if type(v) == ro.rinterface.RNULLType:
            return None
        elif type(v) == ro.vectors.Matrix:
            return np.array(v)
        elif type(v) == ro.vectors.FloatVector:
            return np.array(v, dtype="float64")
        elif type(v) == ro.vectors.IntVector:
            return np.array(v, dtype="int64")
        elif type(v) == ro.rinterface.RNULLType:
            return None
        elif type(v) == ro.vectors.ListVector:
            try:
                return {v.names[i]: convert_r_obj(v[i], obj_to_obj=obj_to_obj) for i in range(len(v))}
            except TypeError:
                return {i: convert_r_obj(v[i], obj_to_obj=obj_to_obj) for i in range(len(v))}
        elif type(v) == ro.vectors.StrVector:
            if len(v) == 1:
                return str(v[0])
            else:
                try:
                    return {v.names[i]: convert_r_obj(v[i], obj_to_obj=obj_to_obj) for i in range(len(v))}
                except TypeError:
                    return {i: convert_r_obj(v[i], obj_to_obj=obj_to_obj) for i in range(len(v))}
        elif type(v) == ro.vectors.DataFrame:
            from rpy2.robjects import pandas2ri
            return pandas2ri.ri2py(v)
        elif type(v) == ro.methods.RS4:
            if obj_to_obj:
                class RS4Object(object):
                    def __repr__(self) -> str:
                        return f"< RS4Object with attributes: {list(self.__dict__.keys())} >"
                rs4obj = RS4Object()
                for k in tuple(v.slotnames()):
                    setattr(rs4obj, k, convert_r_obj(v.slots[k], obj_to_obj=obj_to_obj))
                return rs4obj
            else:
                return {k: convert_r_obj(v.slots[k]) for k in tuple(v.slotnames())}
        else:
            if type(v) != str:
                if verbose:
                    print(f"not supported yet {type(v)}")
            return v

Example #9

Source File: PipelineTimeseries.py From CGATPipelines with MIT License

4 votes

def covarFilter(infile,
                time_points,
                replicates,
                quantile):
    '''
    Filter gene list based on the distribution of the
    sums of the covariance of each gene.  This is highly
    recommended to reduce the total number of genes used
    in the dynamic time warping clustering to reduce the
    computational time.  The threshold is placed at the
    intersection of the expected and observed value
    for the given quantile.
    '''

    time_points.sort()
    time_rep_comb = [x for x in itertools.product(time_points, replicates)]
    time_cond = ro.StrVector([x[0] for x in time_rep_comb])
    rep_cond = ro.StrVector([x[1] for x in time_rep_comb])
    df = pd.read_table(infile, sep="\t", header=0, index_col=0)

    df.drop(['replicates'], inplace=True, axis=1)
    df.drop(['times'], inplace=True, axis=1)
    df = df.fillna(0.0)

    R.assign('diff_data', df)

    E.info("loading data frame")

    # need to be careful about column headers and transposing data frames

    R('''trans_data <- data.frame(diff_data)''')
    R('''times <- c(%s)''' % time_cond.r_repr())
    R('''replicates <- c(%s)''' % rep_cond.r_repr())

    # calculate the covariance matrix for all genes
    # sum each gene's covariance vector

    E.info("calculating sum of covariance of expression")

    R('''covar.mat <- abs(cov(trans_data))''')
    R('''sum.covar <- rowSums(covar.mat)''')
    R('''exp.covar <- abs(qnorm(ppoints(sum.covar),'''
      '''mean=mean(sum.covar), sd=sd(sum.covar)))''')
    R('''sum.covar.quant <- quantile(sum.covar)''')
    R('''exp.covar.quant <- quantile(exp.covar)''')

    E.info("filter on quantile")

    R('''filtered_genes <- names(sum.covar[sum.covar > '''
      '''sum.covar.quant[%(quantile)i]'''
      ''' & sum.covar > exp.covar.quant[%(quantile)i]])''' % locals())
    R('''filtered_frame <- data.frame(diff_data[, filtered_genes],'''
      '''times, replicates)''')

    filtered_frame = pandas2i.ri2py('filtered_frame').T

    return filtered_frame