Python rpy2.robjects.pandas2ri.ri2py() Examples
The following are 9
code examples of rpy2.robjects.pandas2ri.ri2py().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rpy2.robjects.pandas2ri
, or try the search function
.
Example #1
Source File: r_random_forest_clf.py From 2020plus with Apache License 2.0 | 6 votes |
def load_cv(self, path): set_wd_str = 'setwd("{0}")'.format(os.getcwd()) ro.r(set_wd_str) ro.r('load("{0}")'.format(path)) self.rf_cv = ro.r["trained.models"] if new_pandas_flag: # rpy2 is a complete joke of a package try: # use this way for conversion for bugged rpy2 versions self.cv_folds = pandas2ri.ri2py(ro.r["cvFoldDf"]) except: # this should be the correct way to convert # but several versions of rpy2 have a bug self.cv_folds = ro.r["cvFoldDf"] else: self.cv_folds = com.convert_robj(ro.r["cvFoldDf"])
Example #2
Source File: r_random_forest_clf.py From 2020plus with Apache License 2.0 | 6 votes |
def predict_proba(self, xtest): """Predicts the probability for each class. Parameters ---------- xtest : pd.DataFrame features for test set """ if new_pandas_flag: r_xtest = pandas2ri.py2ri(xtest) else: r_xtest = com.convert_to_r_dataframe(xtest) #r_xtest = pandas2ri.ri2py(xtest) pred_prob = self.rf_pred_prob(self.rf, r_xtest) if new_pandas_flag: py_pred_prob = pandas2ri.ri2py(pred_prob) else: py_pred_prob = com.convert_robj(pred_prob) py_pred_prob = py_pred_prob.values #py_pred_prob = pandas2ri.ri2py(pred_prob) return py_pred_prob
Example #3
Source File: test_R.py From palladium with Apache License 2.0 | 6 votes |
def test_predict_with_pandas_data(self, Model, dataframe): X, y = dataframe model = Model(scriptname='myscript', funcname='myfunc', some='kwarg') model.r['predict'].return_value = numpy.array( [[0.1, 0.2, 0.7], [0.8, 0.1, 0.1]]) model.fit(X, y) result = model.predict(X) predictargs = model.r['predict'].call_args assert predictargs[0][0] is model.rmodel_ assert (ri2py(predictargs[0][1]).values == X.values).all() assert predictargs[1]['type'] == 'prob' assert (result == numpy.argmax(model.r['predict'].return_value, axis=1)).all() result = model.predict_proba(X) assert (result == model.r['predict'].return_value).all()
Example #4
Source File: test_R.py From palladium with Apache License 2.0 | 6 votes |
def test_smoke(self, dataset, model): X, y = dataset() model.fit(X, y) assert model.predict(X).shape == ( ri2py(y).shape if isinstance(y, Vector) else y.shape) score_1 = model.score(X, y) assert score_1 >= 0.1 # Convert X to its Python or R equivalent and check if scores # match: X_t = py2ri(X) if isinstance(X, DataFrame) else ri2py(X) score_2 = model.score(X_t, y) assert score_2 == score_1 # Convert X to a Python list and run the prediction: X_t2 = ri2py(X) if not isinstance(X, DataFrame) else X X_t2 = X_t2.values.tolist() score_3 = model.score(X_t2, y) assert score_3 == score_1
Example #5
Source File: r_random_forest_clf.py From 2020plus with Apache License 2.0 | 5 votes |
def predict(self, xtest): """Predicts class via majority vote. Parameters ---------- xtest : pd.DataFrame features for test set """ if new_pandas_flag: r_xtest = pandas2ri.py2ri(xtest) else: r_xtest = com.convert_to_r_dataframe(xtest) #r_xtest = pandas2ri.py2ri(xtest) pred = self.rf_pred(self.rf, r_xtest) if new_pandas_flag: #py_pred = pandas2ri.ri2py(pred) tmp_genes = pred[1] tmp_pred_class = pred[0] genes = pandas2ri.ri2py(tmp_genes) pred_class = pandas2ri.ri2py(tmp_pred_class) else: py_pred = com.convert_robj(pred) genes, pred_class = zip(*py_pred.items()) #genes = com.convert_robj(tmp_genes) #pred_class = com.convert_robj(tmp_pred_class) tmp_df = pd.DataFrame({'pred_class': pred_class}, index=genes) tmp_df = tmp_df.reindex(xtest.index) tmp_df -= 1 # for some reason the class numbers start at 1 return tmp_df['pred_class']
Example #6
Source File: test_R.py From palladium with Apache License 2.0 | 5 votes |
def test_fit_with_pandas_data(self, Model, dataframe): X, y = dataframe model = Model(scriptname='myscript', funcname='myfunc', some='kwarg') model.fit(X, y) funcargs = model.r['myfunc'].call_args assert (ri2py(funcargs[0][0]).values == X.values).all() assert (ri2py(funcargs[0][1]) == y).all() assert funcargs[1]['some'] == 'kwarg'
Example #7
Source File: r_random_forest_clf.py From 2020plus with Apache License 2.0 | 4 votes |
def fit(self, xtrain, ytrain): """The fit method trains R's random forest classifier. NOTE: the method name ("fit") and method signature were choosen to be consistent with scikit learn's fit method. Parameters ---------- xtrain : pd.DataFrame features for training set ytrain : pd.DataFrame true class labels (as integers) for training set """ label_counts = ytrain.value_counts() if self.is_onco_pred and self.is_tsg_pred: sampsize = [label_counts[self.other_num], label_counts[self.onco_num], label_counts[self.tsg_num]] elif self.is_onco_pred: sampsize = [label_counts[self.other_num], label_counts[self.onco_num]] elif self.is_tsg_pred: sampsize = [label_counts[self.other_num], label_counts[self.tsg_num]] self.set_sample_size(sampsize) ytrain.index = xtrain.index # ensure indexes match xtrain['true_class'] = ytrain # convert if new_pandas_flag: r_xtrain = pandas2ri.py2ri(xtrain) else: r_xtrain = com.convert_to_r_dataframe(xtrain) #ro.globalenv['trainData'] = r_xtrain self.rf = self.rf_fit(r_xtrain, self.ntrees, self.sample_size) r_imp = self.rf_imp(self.rf) # importance dataframe in R if new_pandas_flag: self.feature_importances_ = pandas2ri.ri2py(r_imp) else: self.feature_importances_ = com.convert_robj(r_imp) #self.feature_importances_ = pandas2ri.ri2py(r_imp)
Example #8
Source File: r_interface.py From velocyto.py with BSD 2-Clause "Simplified" License | 4 votes |
def convert_r_obj(v: Any, obj_to_obj: bool=True, verbose: bool=True) -> Any: """Function with manually specified conversion from a r-object to a python object """ if type(v) == ro.rinterface.RNULLType: return None elif type(v) == ro.vectors.Matrix: return np.array(v) elif type(v) == ro.vectors.FloatVector: return np.array(v, dtype="float64") elif type(v) == ro.vectors.IntVector: return np.array(v, dtype="int64") elif type(v) == ro.rinterface.RNULLType: return None elif type(v) == ro.vectors.ListVector: try: return {v.names[i]: convert_r_obj(v[i], obj_to_obj=obj_to_obj) for i in range(len(v))} except TypeError: return {i: convert_r_obj(v[i], obj_to_obj=obj_to_obj) for i in range(len(v))} elif type(v) == ro.vectors.StrVector: if len(v) == 1: return str(v[0]) else: try: return {v.names[i]: convert_r_obj(v[i], obj_to_obj=obj_to_obj) for i in range(len(v))} except TypeError: return {i: convert_r_obj(v[i], obj_to_obj=obj_to_obj) for i in range(len(v))} elif type(v) == ro.vectors.DataFrame: from rpy2.robjects import pandas2ri return pandas2ri.ri2py(v) elif type(v) == ro.methods.RS4: if obj_to_obj: class RS4Object(object): def __repr__(self) -> str: return f"< RS4Object with attributes: {list(self.__dict__.keys())} >" rs4obj = RS4Object() for k in tuple(v.slotnames()): setattr(rs4obj, k, convert_r_obj(v.slots[k], obj_to_obj=obj_to_obj)) return rs4obj else: return {k: convert_r_obj(v.slots[k]) for k in tuple(v.slotnames())} else: if type(v) != str: if verbose: print(f"not supported yet {type(v)}") return v
Example #9
Source File: PipelineTimeseries.py From CGATPipelines with MIT License | 4 votes |
def covarFilter(infile, time_points, replicates, quantile): ''' Filter gene list based on the distribution of the sums of the covariance of each gene. This is highly recommended to reduce the total number of genes used in the dynamic time warping clustering to reduce the computational time. The threshold is placed at the intersection of the expected and observed value for the given quantile. ''' time_points.sort() time_rep_comb = [x for x in itertools.product(time_points, replicates)] time_cond = ro.StrVector([x[0] for x in time_rep_comb]) rep_cond = ro.StrVector([x[1] for x in time_rep_comb]) df = pd.read_table(infile, sep="\t", header=0, index_col=0) df.drop(['replicates'], inplace=True, axis=1) df.drop(['times'], inplace=True, axis=1) df = df.fillna(0.0) R.assign('diff_data', df) E.info("loading data frame") # need to be careful about column headers and transposing data frames R('''trans_data <- data.frame(diff_data)''') R('''times <- c(%s)''' % time_cond.r_repr()) R('''replicates <- c(%s)''' % rep_cond.r_repr()) # calculate the covariance matrix for all genes # sum each gene's covariance vector E.info("calculating sum of covariance of expression") R('''covar.mat <- abs(cov(trans_data))''') R('''sum.covar <- rowSums(covar.mat)''') R('''exp.covar <- abs(qnorm(ppoints(sum.covar),''' '''mean=mean(sum.covar), sd=sd(sum.covar)))''') R('''sum.covar.quant <- quantile(sum.covar)''') R('''exp.covar.quant <- quantile(exp.covar)''') E.info("filter on quantile") R('''filtered_genes <- names(sum.covar[sum.covar > ''' '''sum.covar.quant[%(quantile)i]''' ''' & sum.covar > exp.covar.quant[%(quantile)i]])''' % locals()) R('''filtered_frame <- data.frame(diff_data[, filtered_genes],''' '''times, replicates)''') filtered_frame = pandas2i.ri2py('filtered_frame').T return filtered_frame