Python sklearn.ensemble.RandomForestClassifier() Examples
The following are 30
code examples of sklearn.ensemble.RandomForestClassifier().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.ensemble
, or try the search function
.

Example #1
Source File: mmbot.py From MaliciousMacroBot with MIT License | 8 votes |
def mmb_evaluate_model(self): """ Returns scores from cross validation evaluation on the malicious / benign classifier """ predictive_features = self.features['predictive_features'] self.clf_X = self.modeldata[predictive_features].values self.clf_y = np.array(self.modeldata['label']) X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0) lb = LabelBinarizer() y_train = np.array([number[0] for number in lb.fit_transform(y_train)]) eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2) eval_cls.fit(X_train, y_train) recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall') precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision') accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy') f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro') return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall}
Example #2
Source File: vanilla_model.py From OpenChem with MIT License | 6 votes |
def __init__(self, model_type='classifier', feature_type='fingerprints', n_estimators=100, n_ensemble=5): super(RandomForestQSAR, self).__init__() self.n_estimators = n_estimators self.n_ensemble = n_ensemble self.model = [] self.model_type = model_type if self.model_type == 'classifier': for i in range(n_ensemble): self.model.append(RFC(n_estimators=n_estimators)) elif self.model_type == 'regressor': for i in range(n_ensemble): self.model.append(RFR(n_estimators=n_estimators)) else: raise ValueError('invalid value for argument') self.feature_type = feature_type if self.feature_type == 'descriptors': self.calc = Calculator(descriptors, ignore_3D=True) self.desc_mean = [0]*self.n_ensemble
Example #3
Source File: forest.py From cgpm with Apache License 2.0 | 6 votes |
def __init__(self, outputs, inputs, k=None, hypers=None, params=None, distargs=None, rng=None): self.rng = gu.gen_rng() if rng is None else rng self.outputs = outputs self.inputs = inputs self.rng = gu.gen_rng() if rng is None else rng assert len(self.outputs) == 1 assert len(self.inputs) >= 1 assert self.outputs[0] not in self.inputs assert len(distargs['inputs']['stattypes']) == len(self.inputs) self.stattypes = distargs['inputs']['stattypes'] # Number of output categories and input dimension. # XXX WHATTA HACK. BayesDB passes in top-level kwargs, not in distargs. self.k = k if k is not None else int(distargs['k']) self.p = len(distargs['inputs']['stattypes']) # Sufficient statistics. self.N = 0 self.data = Data(x=OrderedDict(), Y=OrderedDict()) self.counts = [0] * self.k # Outlier and random forest parameters. if params is None: params = {} self.alpha = params.get('alpha', .1) self.regressor = params.get('forest', None) if self.regressor is None: self.regressor = RandomForestClassifier(random_state=self.rng)
Example #4
Source File: function.py From Karta with MIT License | 6 votes |
def trainFunctionTypeClassifier(self, scs): """Train the type classifier, according to all known code segments. Args: scs (list): list of all known (sark) code segments Note: Training must happen *after* the calibration phase """ functions = [] for sc in scs: functions += list(filter(lambda func: not self._analyzer.fptr_identifier.isPointedFunction(func.start_ea), sc.functions)) clf = RandomForestClassifier(n_estimators=100) eas = list(map(lambda x: x.start_ea, functions)) data_set = list(map(self.extractFunctionTypeSample, eas)) data_results = list(map(self._analyzer.codeType, eas)) # classify clf.fit(data_set, data_results) # store the results self._type_classifier = clf
Example #5
Source File: classifier.py From stock-price-prediction with MIT License | 6 votes |
def buildModel(dataset, method, parameters): """ Build final model for predicting real testing data """ features = dataset.columns[0:-1] if method == 'RNN': clf = performRNNlass(dataset[features], dataset['UpDown']) return clf elif method == 'RF': clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1) elif method == 'KNN': clf = neighbors.KNeighborsClassifier() elif method == 'SVM': c = parameters[0] g = parameters[1] clf = SVC(C=c, gamma=g) elif method == 'ADA': clf = AdaBoostClassifier() return clf.fit(dataset[features], dataset['UpDown'])
Example #6
Source File: adult_RF_Classify.py From Machine-Learning-for-Beginner-by-Python3 with MIT License | 6 votes |
def Train(data, treecount, tezh, yanzhgdata): model = RFC(n_estimators=treecount, max_features=tezh, class_weight='balanced') model.fit(data[:, :-1], data[:, -1]) # 给出训练数据的预测值 train_out = model.predict(data[:, :-1]) # 计算MSE train_mse = fmse(data[:, -1], train_out)[0] # 给出验证数据的预测值 add_yan = model.predict(yanzhgdata[:, :-1]) # 计算f1度量 add_mse = fmse(yanzhgdata[:, -1], add_yan)[0] print(train_mse, add_mse) return train_mse, add_mse # 最终确定组合的函数
Example #7
Source File: 03_fit_predict_plot_midwest_survey.py From dirty_cat with BSD 3-Clause "New" or "Revised" License | 6 votes |
def make_pipeline(encoding_method): # static transformers from the other columns transformers = [('one-hot-clean', encoder_dict['one-hot'], clean_columns)] # adding the encoded column transformers += [(encoding_method + '-dirty', encoder_dict[encoding_method], [dirty_column])] pipeline = Pipeline([ # Use ColumnTransformer to combine the features ('union', ColumnTransformer( transformers=transformers, remainder='drop')), ('scaler', StandardScaler(with_mean=False)), ('classifier', RandomForestClassifier(random_state=5)) ]) return pipeline ############################################################################### # Evaluation of different encoding methods # ----------------------------------------- # We then loop over encoding methods, scoring the different pipeline predictions # using a cross validation score:
Example #8
Source File: Stock_Prediction_Model_Random_Forrest.py From StockRecommendSystem with MIT License | 6 votes |
def build_model(self, X_train, y_train): if self.paras.load == True: model = self.load_training_model(self.paras.window_len) if model != None: return model print('build Random Forrest model...') # range of number of trees : 5*(1 -> 10) = 5,10,...,50 trees t_min = self.paras.tree_min[index] t_max = self.paras.tree_max[index] # range of max of features : 1 -> 10 features f_min = self.paras.feature_min[index] f_max = self.paras.feature_max[index] # range of window : 1 -> 70 days w_min = self.paras.window_min w_max = self.paras.window_max w_opt, n_opt, m_opt = self.best_window(X_train, y_train, w_min,w_max,t_min,t_max,f_min,f_max) model = RandomForestClassifier(n_estimators=n_opt,max_features=m_opt, n_jobs=8, verbose=self.paras.verbose) return model
Example #9
Source File: test_train_pairwise_similarity_model.py From redshells with MIT License | 6 votes |
def test_run(self): self.input_data['item2embedding'] = dict(i0=[1, 2], i1=[3, 4]) self.input_data['similarity_data'] = pd.DataFrame( dict(item1=['i0', 'i0', 'i1'], item2=['i0', 'i1', 'i1'], similarity=[1, 0, 1])) task = TrainPairwiseSimilarityModel( item2embedding_task=_DummyTask(), similarity_data_task=_DummyTask(), model_name='RandomForestClassifier', item0_column_name='item1', item1_column_name='item2', similarity_column_name='similarity') task.load = MagicMock(side_effect=self._load) task.dump = MagicMock(side_effect=self._dump) task.run() self.assertIsInstance(self.dump_data, RandomForestClassifier)
Example #10
Source File: common_utils.py From interpret-text with MIT License | 5 votes |
def create_random_forest_tfidf(): vectorizer = TfidfVectorizer(lowercase=False) rf = RandomForestClassifier(n_estimators=500, random_state=777) return Pipeline([("vectorizer", vectorizer), ("rf", rf)])
Example #11
Source File: common_utils.py From interpret-text with MIT License | 5 votes |
def create_random_forest_vectorizer(): vectorizer = CountVectorizer(lowercase=False, min_df=0.0, binary=True) rf = RandomForestClassifier(n_estimators=500, random_state=777) return Pipeline([("vectorizer", vectorizer), ("rf", rf)])
Example #12
Source File: common_utils.py From interpret-text with MIT License | 5 votes |
def create_sklearn_random_forest_classifier(X, y): rfc = ensemble.RandomForestClassifier(max_depth=4, random_state=777) model = rfc.fit(X, y) return model
Example #13
Source File: mmbot.py From MaliciousMacroBot with MIT License | 5 votes |
def build_models(self): """ After get_language_features is called, this function builds the models based on the classifier matrix and labels. :return: """ self.cls = RandomForestClassifier(n_estimators=100, max_features=.2) # build classifier self.cls.fit(self.clf_X, self.clf_y) return self.cls
Example #14
Source File: model_loop.py From fake-news-detection with MIT License | 5 votes |
def define_clfs_params(self): ''' Defines all relevant parameters and classes for classfier objects. Edit these if you wish to change parameters. ''' # These are the classifiers self.clfs = { 'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1), 'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'), 'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200), 'LR': LogisticRegression(penalty = 'l1', C = 1e5), 'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0), 'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10), 'NB': GaussianNB(), 'DT': DecisionTreeClassifier(), 'SGD': SGDClassifier(loss = 'log', penalty = 'l2'), 'KNN': KNeighborsClassifier(n_neighbors = 3) } # These are the parameters which will be run through self.params = { 'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]}, 'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]}, 'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]}, 'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]}, 'NB': {}, 'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]}, 'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]}, 'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']} }
Example #15
Source File: test_stacker.py From xcessiv with Apache License 2.0 | 5 votes |
def setUp(self): bl1 = RandomForestClassifier(random_state=8) bl2 = LogisticRegression() bl3 = RandomForestClassifier(max_depth=10, random_state=10) meta_est = LogisticRegression() skf = StratifiedKFold(random_state=8).split self.stacked_ensemble = stacker.XcessivStackedEnsemble( [bl1, bl2, bl3], ['predict', 'predict_proba', 'predict_proba'], meta_est, skf )
Example #16
Source File: test_functions.py From xcessiv with Apache License 2.0 | 5 votes |
def test_is_valid_json(self): assert functions.is_valid_json({'x': ['i am serializable', 0.1]}) assert not functions.is_valid_json({'x': RandomForestClassifier()})
Example #17
Source File: test_functions.py From xcessiv with Apache License 2.0 | 5 votes |
def test_make_serializable(self): assert functions.is_valid_json({'x': ['i am serializable', 0.1]}) assert not functions.is_valid_json({'x': RandomForestClassifier()}) assert functions.make_serializable( { 'x': ['i am serializable', 0.1], 'y': RandomForestClassifier() } ) == {'x': ['i am serializable', 0.1]}
Example #18
Source File: test_functions.py From xcessiv with Apache License 2.0 | 5 votes |
def test_verify_estimator_class(self): np.random.seed(8) performance_dict, hyperparameters = functions.verify_estimator_class( RandomForestClassifier(), 'predict_proba', dict(Accuracy=self.source), self.dataset_properties ) assert round(performance_dict['Accuracy'], 3) == 0.8 assert hyperparameters == { 'warm_start': False, 'oob_score': False, 'n_jobs': 1, 'verbose': 0, 'max_leaf_nodes': None, 'bootstrap': True, 'min_samples_leaf': 1, 'n_estimators': 10, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'criterion': 'gini', 'random_state': None, 'min_impurity_split': None, 'min_impurity_decrease': 0.0, 'max_features': 'auto', 'max_depth': None, 'class_weight': None }
Example #19
Source File: test_functions.py From xcessiv with Apache License 2.0 | 5 votes |
def test_non_serializable_parameters(self): pipeline = Pipeline([('pca', PCA()), ('rf', RandomForestClassifier())]) performance_dict, hyperparameters = functions.verify_estimator_class( pipeline, 'predict_proba', dict(Accuracy=self.source), self.dataset_properties ) assert functions.is_valid_json(hyperparameters)
Example #20
Source File: test_functions.py From xcessiv with Apache License 2.0 | 5 votes |
def test_assertion_meta_feature_generator(self): np.random.seed(8) self.assertRaises( exceptions.UserError, functions.verify_estimator_class, RandomForestClassifier(), 'decision_function', dict(Accuracy=self.source), self.dataset_properties )
Example #21
Source File: test_models.py From xcessiv with Apache License 2.0 | 5 votes |
def setUp(self): self.base_learner_origin = models.BaseLearnerOrigin( source=''.join([ "from sklearn.ensemble import RandomForestClassifier\n", "base_learner = RandomForestClassifier(random_state=8)" ]) )
Example #22
Source File: test_models.py From xcessiv with Apache License 2.0 | 5 votes |
def test_return_estimator_from_json(self): est = self.base_learner_origin.return_estimator() assert isinstance(est, RandomForestClassifier)
Example #23
Source File: conftest.py From yatsm with MIT License | 5 votes |
def make_example_classifier(filename): # Create a dummy RF model for train/classify testing rf = RandomForestClassifier() p, n_class = 42, 2 n = n_class * 5 X = np.random.rand(n, p) y = np.repeat(range(n_class), n / n_class) rf.fit(X, y) jl.dump(rf, filename) # EXAMPLE DATASETS
Example #24
Source File: test_missforest.py From missingpy with GNU General Public License v3.0 | 5 votes |
def test_missforest_categorical_single(): # Test imputation with default parameter values # Test with a single missing value df = np.array([ [0, 0, 0, 1], [0, 1, 2, 2], [0, 2, 3, 2], [np.nan, 4, 5, 5], [1, 7, 6, 7], [1, 8, 8, 8], [1, 15, 18, 19], ]) y = df[:, 0] X = df[:, 1:] good_rows = np.where(~np.isnan(y))[0] bad_rows = np.where(np.isnan(y))[0] rf = RandomForestClassifier(n_estimators=10, random_state=1337) rf.fit(X=X[good_rows], y=y[good_rows]) pred_val = rf.predict(X[bad_rows]) df_imputed = np.array([ [0, 0, 0, 1], [0, 1, 2, 2], [0, 2, 3, 2], [pred_val, 4, 5, 5], [1, 7, 6, 7], [1, 8, 8, 8], [1, 15, 18, 19], ]) imputer = MissForest(n_estimators=10, random_state=1337) assert_array_equal(imputer.fit_transform(df, cat_vars=0), df_imputed) assert_array_equal(imputer.fit_transform(df, cat_vars=[0]), df_imputed)
Example #25
Source File: unit_tests.py From boruta_py with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_get_tree_num(self): rfc = RandomForestClassifier(max_depth=10) bt = BorutaPy(rfc) self.assertEqual(bt._get_tree_num(10), 44, "Tree Est. Math Fail") self.assertEqual(bt._get_tree_num(100), 141, "Tree Est. Math Fail")
Example #26
Source File: unit_tests.py From boruta_py with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_if_boruta_extracts_relevant_features(self): np.random.seed(42) y = np.random.binomial(1, 0.5, 1000) X = np.zeros((1000, 10)) z = y - np.random.binomial(1, 0.1, 1000) + np.random.binomial(1, 0.1, 1000) z[z == -1] = 0 z[z == 2] = 1 # 5 relevant features X[:, 0] = z X[:, 1] = y * np.abs(np.random.normal(0, 1, 1000)) + np.random.normal(0, 0.1, 1000) X[:, 2] = y + np.random.normal(0, 1, 1000) X[:, 3] = y ** 2 + np.random.normal(0, 1, 1000) X[:, 4] = np.sqrt(y) + np.random.binomial(2, 0.1, 1000) # 5 irrelevant features X[:, 5] = np.random.normal(0, 1, 1000) X[:, 6] = np.random.poisson(1, 1000) X[:, 7] = np.random.binomial(1, 0.3, 1000) X[:, 8] = np.random.normal(0, 1, 1000) X[:, 9] = np.random.poisson(1, 1000) rfc = RandomForestClassifier() bt = BorutaPy(rfc) bt.fit(X, y) # make sure that only all the relevant features are returned self.assertListEqual(list(range(5)), list(np.where(bt.support_)[0])) # test if this works as expected for dataframe input X_df, y_df = pd.DataFrame(X), pd.Series(y) bt.fit(X_df, y_df) self.assertListEqual(list(range(5)), list(np.where(bt.support_)[0])) # check it dataframe is returned when return_df=True self.assertIsInstance(bt.transform(X_df, return_df=True), pd.DataFrame)
Example #27
Source File: toxcast_rf.py From deepchem with MIT License | 5 votes |
def model_builder(model_dir): sklearn_model = RandomForestClassifier( class_weight="balanced", n_estimators=500, n_jobs=-1) return dc.models.SklearnModel(sklearn_model, model_dir)
Example #28
Source File: muv_sklearn.py From deepchem with MIT License | 5 votes |
def model_builder(model_dir): sklearn_model = RandomForestClassifier( class_weight="balanced", n_estimators=500) return dc.models.SklearnModel(sklearn_model, model_dir)
Example #29
Source File: pcba_sklearn.py From deepchem with MIT License | 5 votes |
def model_builder(model_dir): sklearn_model = RandomForestClassifier( class_weight="balanced", n_estimators=500) return SklearnModel(sklearn_model, model_dir)
Example #30
Source File: sweet.py From deepchem with MIT License | 5 votes |
def model_builder(model_dir): sklearn_model = RandomForestClassifier( class_weight="balanced", n_estimators=500, n_jobs=-1) return dc.models.SklearnModel(sklearn_model, model_dir)