Python sklearn.ensemble.RandomForestClassifier() Examples

The following are 30 code examples for showing how to use sklearn.ensemble.RandomForestClassifier(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.ensemble , or try the search function .

Example 1
Project: MaliciousMacroBot   Author: egaus   File: mmbot.py    License: MIT License 6 votes vote down vote up
def mmb_evaluate_model(self):
        """
        Returns scores from cross validation evaluation on the malicious / benign classifier
        """
        predictive_features = self.features['predictive_features']
        self.clf_X = self.modeldata[predictive_features].values
        self.clf_y = np.array(self.modeldata['label'])

        X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0)
        lb = LabelBinarizer()
        y_train = np.array([number[0] for number in lb.fit_transform(y_train)])
        eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2)
        eval_cls.fit(X_train, y_train)

        recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall')
        precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision')
        accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy')
        f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro')

        return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall} 
Example 2
Project: OpenChem   Author: Mariewelt   File: vanilla_model.py    License: MIT License 6 votes vote down vote up
def __init__(self, model_type='classifier', feature_type='fingerprints',
                 n_estimators=100, n_ensemble=5):
        super(RandomForestQSAR, self).__init__()
        self.n_estimators = n_estimators
        self.n_ensemble = n_ensemble
        self.model = []
        self.model_type = model_type
        if self.model_type == 'classifier':
            for i in range(n_ensemble):
                self.model.append(RFC(n_estimators=n_estimators))
        elif self.model_type == 'regressor':
            for i in range(n_ensemble):
                self.model.append(RFR(n_estimators=n_estimators))
        else:
            raise ValueError('invalid value for argument')
        self.feature_type = feature_type
        if self.feature_type == 'descriptors':
            self.calc = Calculator(descriptors, ignore_3D=True)
            self.desc_mean = [0]*self.n_ensemble 
Example 3
Project: cgpm   Author: probcomp   File: forest.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, outputs, inputs, k=None, hypers=None, params=None,
            distargs=None, rng=None):
        self.rng = gu.gen_rng() if rng is None else rng
        self.outputs = outputs
        self.inputs = inputs
        self.rng = gu.gen_rng() if rng is None else rng
        assert len(self.outputs) == 1
        assert len(self.inputs) >= 1
        assert self.outputs[0] not in self.inputs
        assert len(distargs['inputs']['stattypes']) == len(self.inputs)
        self.stattypes = distargs['inputs']['stattypes']
        # Number of output categories and input dimension.
        # XXX WHATTA HACK. BayesDB passes in top-level kwargs, not in distargs.
        self.k = k if k is not None else int(distargs['k'])
        self.p = len(distargs['inputs']['stattypes'])
        # Sufficient statistics.
        self.N = 0
        self.data = Data(x=OrderedDict(), Y=OrderedDict())
        self.counts = [0] * self.k
        # Outlier and random forest parameters.
        if params is None: params = {}
        self.alpha = params.get('alpha', .1)
        self.regressor = params.get('forest', None)
        if self.regressor is None:
            self.regressor = RandomForestClassifier(random_state=self.rng) 
Example 4
Project: Karta   Author: CheckPointSW   File: function.py    License: MIT License 6 votes vote down vote up
def trainFunctionTypeClassifier(self, scs):
        """Train the type classifier, according to all known code segments.

        Args:
            scs (list): list of all known (sark) code segments

        Note:
            Training must happen *after* the calibration phase
        """
        functions = []
        for sc in scs:
            functions += list(filter(lambda func: not self._analyzer.fptr_identifier.isPointedFunction(func.start_ea), sc.functions))
        clf = RandomForestClassifier(n_estimators=100)
        eas = list(map(lambda x: x.start_ea, functions))
        data_set = list(map(self.extractFunctionTypeSample, eas))
        data_results = list(map(self._analyzer.codeType, eas))
        # classify
        clf.fit(data_set, data_results)
        # store the results
        self._type_classifier = clf 
Example 5
Project: stock-price-prediction   Author: chinuy   File: classifier.py    License: MIT License 6 votes vote down vote up
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown']) 
Example 6
Project: Machine-Learning-for-Beginner-by-Python3   Author: Anfany   File: adult_RF_Classify.py    License: MIT License 6 votes vote down vote up
def Train(data, treecount, tezh, yanzhgdata):
    model = RFC(n_estimators=treecount, max_features=tezh, class_weight='balanced')
    model.fit(data[:, :-1], data[:, -1])
    # 给出训练数据的预测值
    train_out = model.predict(data[:, :-1])
    # 计算MSE
    train_mse = fmse(data[:, -1], train_out)[0]

    # 给出验证数据的预测值
    add_yan = model.predict(yanzhgdata[:, :-1])
    # 计算f1度量
    add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
    print(train_mse, add_mse)
    return train_mse, add_mse

# 最终确定组合的函数 
Example 7
def make_pipeline(encoding_method):
    # static transformers from the other columns
    transformers = [('one-hot-clean', encoder_dict['one-hot'], clean_columns)]
    # adding the encoded column
    transformers += [(encoding_method + '-dirty', encoder_dict[encoding_method],
                      [dirty_column])]
    pipeline = Pipeline([
        # Use ColumnTransformer to combine the features
        ('union', ColumnTransformer(
            transformers=transformers,
            remainder='drop')),
        ('scaler', StandardScaler(with_mean=False)),
        ('classifier', RandomForestClassifier(random_state=5))
    ])

    return pipeline


###############################################################################
# Evaluation of different encoding methods
# -----------------------------------------
# We then loop over encoding methods, scoring the different pipeline predictions
# using a cross validation score: 
Example 8
Project: StockRecommendSystem   Author: doncat99   File: Stock_Prediction_Model_Random_Forrest.py    License: MIT License 6 votes vote down vote up
def build_model(self, X_train, y_train):
        if self.paras.load == True:
            model = self.load_training_model(self.paras.window_len)
            if model != None:
                return model

        print('build Random Forrest model...')

        # range of number of trees : 5*(1 -> 10) = 5,10,...,50 trees
        t_min = self.paras.tree_min[index]
        t_max = self.paras.tree_max[index]
        # range of max of features : 1 -> 10 features
        f_min = self.paras.feature_min[index]
        f_max = self.paras.feature_max[index]
        # range of window : 1 -> 70 days 
        w_min = self.paras.window_min
        w_max = self.paras.window_max
        
        w_opt, n_opt, m_opt = self.best_window(X_train, y_train, w_min,w_max,t_min,t_max,f_min,f_max)
        model = RandomForestClassifier(n_estimators=n_opt,max_features=m_opt, n_jobs=8, verbose=self.paras.verbose)
        return model 
Example 9
Project: redshells   Author: m3dev   File: test_train_pairwise_similarity_model.py    License: MIT License 6 votes vote down vote up
def test_run(self):
        self.input_data['item2embedding'] = dict(i0=[1, 2], i1=[3, 4])
        self.input_data['similarity_data'] = pd.DataFrame(
            dict(item1=['i0', 'i0', 'i1'], item2=['i0', 'i1', 'i1'], similarity=[1, 0, 1]))

        task = TrainPairwiseSimilarityModel(
            item2embedding_task=_DummyTask(),
            similarity_data_task=_DummyTask(),
            model_name='RandomForestClassifier',
            item0_column_name='item1',
            item1_column_name='item2',
            similarity_column_name='similarity')
        task.load = MagicMock(side_effect=self._load)
        task.dump = MagicMock(side_effect=self._dump)

        task.run()
        self.assertIsInstance(self.dump_data, RandomForestClassifier) 
Example 10
Project: interpret-text   Author: interpretml   File: common_utils.py    License: MIT License 5 votes vote down vote up
def create_random_forest_tfidf():
    vectorizer = TfidfVectorizer(lowercase=False)
    rf = RandomForestClassifier(n_estimators=500, random_state=777)
    return Pipeline([("vectorizer", vectorizer), ("rf", rf)]) 
Example 11
Project: interpret-text   Author: interpretml   File: common_utils.py    License: MIT License 5 votes vote down vote up
def create_random_forest_vectorizer():
    vectorizer = CountVectorizer(lowercase=False, min_df=0.0, binary=True)
    rf = RandomForestClassifier(n_estimators=500, random_state=777)
    return Pipeline([("vectorizer", vectorizer), ("rf", rf)]) 
Example 12
Project: interpret-text   Author: interpretml   File: common_utils.py    License: MIT License 5 votes vote down vote up
def create_sklearn_random_forest_classifier(X, y):
    rfc = ensemble.RandomForestClassifier(max_depth=4, random_state=777)
    model = rfc.fit(X, y)
    return model 
Example 13
Project: MaliciousMacroBot   Author: egaus   File: mmbot.py    License: MIT License 5 votes vote down vote up
def build_models(self):
        """
        After get_language_features is called, this function builds the models based on
        the classifier matrix and labels.
        :return:
        """
        self.cls = RandomForestClassifier(n_estimators=100, max_features=.2)
        # build classifier
        self.cls.fit(self.clf_X, self.clf_y)

        return self.cls 
Example 14
Project: fake-news-detection   Author: aldengolab   File: model_loop.py    License: MIT License 5 votes vote down vote up
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             } 
Example 15
Project: xcessiv   Author: reiinakano   File: test_stacker.py    License: Apache License 2.0 5 votes vote down vote up
def setUp(self):
        bl1 = RandomForestClassifier(random_state=8)
        bl2 = LogisticRegression()
        bl3 = RandomForestClassifier(max_depth=10, random_state=10)

        meta_est = LogisticRegression()

        skf = StratifiedKFold(random_state=8).split

        self.stacked_ensemble = stacker.XcessivStackedEnsemble(
            [bl1, bl2, bl3],
            ['predict', 'predict_proba', 'predict_proba'],
            meta_est,
            skf
        ) 
Example 16
Project: xcessiv   Author: reiinakano   File: test_functions.py    License: Apache License 2.0 5 votes vote down vote up
def test_is_valid_json(self):
        assert functions.is_valid_json({'x': ['i am serializable', 0.1]})
        assert not functions.is_valid_json({'x': RandomForestClassifier()}) 
Example 17
Project: xcessiv   Author: reiinakano   File: test_functions.py    License: Apache License 2.0 5 votes vote down vote up
def test_make_serializable(self):
        assert functions.is_valid_json({'x': ['i am serializable', 0.1]})
        assert not functions.is_valid_json({'x': RandomForestClassifier()})
        assert functions.make_serializable(
            {
                'x': ['i am serializable', 0.1],
                'y': RandomForestClassifier()
            }
        ) == {'x': ['i am serializable', 0.1]} 
Example 18
Project: xcessiv   Author: reiinakano   File: test_functions.py    License: Apache License 2.0 5 votes vote down vote up
def test_verify_estimator_class(self):
        np.random.seed(8)
        performance_dict, hyperparameters = functions.verify_estimator_class(
            RandomForestClassifier(),
            'predict_proba',
            dict(Accuracy=self.source),
            self.dataset_properties
        )
        assert round(performance_dict['Accuracy'], 3) == 0.8
        assert hyperparameters == {
            'warm_start': False,
            'oob_score': False,
            'n_jobs': 1,
            'verbose': 0,
            'max_leaf_nodes': None,
            'bootstrap': True,
            'min_samples_leaf': 1,
            'n_estimators': 10,
            'min_samples_split': 2,
            'min_weight_fraction_leaf': 0.0,
            'criterion': 'gini',
            'random_state': None,
            'min_impurity_split': None,
            'min_impurity_decrease': 0.0,
            'max_features': 'auto',
            'max_depth': None,
            'class_weight': None
        } 
Example 19
Project: xcessiv   Author: reiinakano   File: test_functions.py    License: Apache License 2.0 5 votes vote down vote up
def test_non_serializable_parameters(self):
        pipeline = Pipeline([('pca', PCA()), ('rf', RandomForestClassifier())])
        performance_dict, hyperparameters = functions.verify_estimator_class(
            pipeline,
            'predict_proba',
            dict(Accuracy=self.source),
            self.dataset_properties
        )
        assert functions.is_valid_json(hyperparameters) 
Example 20
Project: xcessiv   Author: reiinakano   File: test_functions.py    License: Apache License 2.0 5 votes vote down vote up
def test_assertion_meta_feature_generator(self):
        np.random.seed(8)
        self.assertRaises(
            exceptions.UserError,
            functions.verify_estimator_class,
            RandomForestClassifier(),
            'decision_function',
            dict(Accuracy=self.source),
            self.dataset_properties
        ) 
Example 21
Project: xcessiv   Author: reiinakano   File: test_models.py    License: Apache License 2.0 5 votes vote down vote up
def setUp(self):
        self.base_learner_origin = models.BaseLearnerOrigin(
            source=''.join([
                "from sklearn.ensemble import RandomForestClassifier\n",
                "base_learner = RandomForestClassifier(random_state=8)"
            ])
        ) 
Example 22
Project: xcessiv   Author: reiinakano   File: test_models.py    License: Apache License 2.0 5 votes vote down vote up
def test_return_estimator_from_json(self):
        est = self.base_learner_origin.return_estimator()
        assert isinstance(est, RandomForestClassifier) 
Example 23
Project: yatsm   Author: ceholden   File: conftest.py    License: MIT License 5 votes vote down vote up
def make_example_classifier(filename):
    # Create a dummy RF model for train/classify testing
    rf = RandomForestClassifier()
    p, n_class = 42, 2
    n = n_class * 5
    X = np.random.rand(n, p)
    y = np.repeat(range(n_class), n / n_class)
    rf.fit(X, y)
    jl.dump(rf, filename)


# EXAMPLE DATASETS 
Example 24
Project: missingpy   Author: epsilon-machine   File: test_missforest.py    License: GNU General Public License v3.0 5 votes vote down vote up
def test_missforest_categorical_single():
    # Test imputation with default parameter values

    # Test with a single missing value
    df = np.array([
        [0,      0,      0,      1],
        [0,      1,      2,      2],
        [0,      2,      3,      2],
        [np.nan, 4,      5,      5],
        [1,      7,      6,      7],
        [1,      8,      8,      8],
        [1,     15,     18,     19],
    ])

    y = df[:, 0]
    X = df[:, 1:]
    good_rows = np.where(~np.isnan(y))[0]
    bad_rows = np.where(np.isnan(y))[0]

    rf = RandomForestClassifier(n_estimators=10, random_state=1337)
    rf.fit(X=X[good_rows], y=y[good_rows])
    pred_val = rf.predict(X[bad_rows])

    df_imputed = np.array([
        [0,         0,      0,      1],
        [0,         1,      2,      2],
        [0,         2,      3,      2],
        [pred_val,  4,      5,      5],
        [1,         7,      6,      7],
        [1,         8,      8,      8],
        [1,         15,     18,     19],
    ])

    imputer = MissForest(n_estimators=10, random_state=1337)
    assert_array_equal(imputer.fit_transform(df, cat_vars=0), df_imputed)
    assert_array_equal(imputer.fit_transform(df, cat_vars=[0]), df_imputed) 
Example 25
Project: boruta_py   Author: scikit-learn-contrib   File: unit_tests.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_get_tree_num(self):
        rfc = RandomForestClassifier(max_depth=10)
        bt = BorutaPy(rfc)
        self.assertEqual(bt._get_tree_num(10), 44, "Tree Est. Math Fail")
        self.assertEqual(bt._get_tree_num(100), 141, "Tree Est. Math Fail") 
Example 26
Project: boruta_py   Author: scikit-learn-contrib   File: unit_tests.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_if_boruta_extracts_relevant_features(self):
        np.random.seed(42)
        y = np.random.binomial(1, 0.5, 1000)
        X = np.zeros((1000, 10))

        z = y - np.random.binomial(1, 0.1, 1000) + np.random.binomial(1, 0.1, 1000)
        z[z == -1] = 0
        z[z == 2] = 1

        # 5 relevant features
        X[:, 0] = z
        X[:, 1] = y * np.abs(np.random.normal(0, 1, 1000)) + np.random.normal(0, 0.1, 1000)
        X[:, 2] = y + np.random.normal(0, 1, 1000)
        X[:, 3] = y ** 2 + np.random.normal(0, 1, 1000)
        X[:, 4] = np.sqrt(y) + np.random.binomial(2, 0.1, 1000)

        # 5 irrelevant features
        X[:, 5] = np.random.normal(0, 1, 1000)
        X[:, 6] = np.random.poisson(1, 1000)
        X[:, 7] = np.random.binomial(1, 0.3, 1000)
        X[:, 8] = np.random.normal(0, 1, 1000)
        X[:, 9] = np.random.poisson(1, 1000)

        rfc = RandomForestClassifier()
        bt = BorutaPy(rfc)
        bt.fit(X, y)

        # make sure that only all the relevant features are returned
        self.assertListEqual(list(range(5)), list(np.where(bt.support_)[0]))

        # test if this works as expected for dataframe input
        X_df, y_df = pd.DataFrame(X), pd.Series(y)
        bt.fit(X_df, y_df)
        self.assertListEqual(list(range(5)), list(np.where(bt.support_)[0]))

        # check it dataframe is returned when return_df=True
        self.assertIsInstance(bt.transform(X_df, return_df=True), pd.DataFrame) 
Example 27
Project: deepchem   Author: deepchem   File: toxcast_rf.py    License: MIT License 5 votes vote down vote up
def model_builder(model_dir):
  sklearn_model = RandomForestClassifier(
      class_weight="balanced", n_estimators=500, n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir) 
Example 28
Project: deepchem   Author: deepchem   File: muv_sklearn.py    License: MIT License 5 votes vote down vote up
def model_builder(model_dir):
  sklearn_model = RandomForestClassifier(
      class_weight="balanced", n_estimators=500)
  return dc.models.SklearnModel(sklearn_model, model_dir) 
Example 29
Project: deepchem   Author: deepchem   File: pcba_sklearn.py    License: MIT License 5 votes vote down vote up
def model_builder(model_dir):
  sklearn_model = RandomForestClassifier(
      class_weight="balanced", n_estimators=500)
  return SklearnModel(sklearn_model, model_dir) 
Example 30
Project: deepchem   Author: deepchem   File: sweet.py    License: MIT License 5 votes vote down vote up
def model_builder(model_dir):
  sklearn_model = RandomForestClassifier(
      class_weight="balanced", n_estimators=500, n_jobs=-1)
  return dc.models.SklearnModel(sklearn_model, model_dir)