Python sklearn.ensemble.IsolationForest() Examples

The following are 30 code examples for showing how to use sklearn.ensemble.IsolationForest(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.ensemble , or try the search function .

Example 1
Project: Deep-SAD-PyTorch   Author: lukasruff   File: isoforest.py    License: MIT License 6 votes vote down vote up
def __init__(self, hybrid=False, n_estimators=100, max_samples='auto', contamination=0.1, n_jobs=-1, seed=None,
                 **kwargs):
        """Init Isolation Forest instance."""
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.contamination = contamination
        self.n_jobs = n_jobs
        self.seed = seed

        self.model = IsolationForest(n_estimators=n_estimators, max_samples=max_samples, contamination=contamination,
                                     n_jobs=n_jobs, random_state=seed, **kwargs)

        self.hybrid = hybrid
        self.ae_net = None  # autoencoder network for the case of a hybrid model

        self.results = {
            'train_time': None,
            'test_time': None,
            'test_auc': None,
            'test_scores': None
        } 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 6 votes vote down vote up
def test_iforest_sparse():
    """Check IForest for various parameter settings on sparse input."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "bootstrap": [True, False]})

    for sparse_format in [csc_matrix, csr_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)

        for params in grid:
            # Trained on sparse format
            sparse_classifier = IsolationForest(
                n_estimators=10, random_state=1, **params).fit(X_train_sparse)
            sparse_results = sparse_classifier.predict(X_test_sparse)

            # Trained on dense format
            dense_classifier = IsolationForest(
                n_estimators=10, random_state=1, **params).fit(X_train)
            dense_results = dense_classifier.predict(X_test)

            assert_array_equal(sparse_results, dense_results) 
Example 3
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 6 votes vote down vote up
def test_iforest_performance():
    """Test Isolation Forest performs well"""

    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)
    X_train = np.r_[X + 2, X - 2]
    X_train = X[:100]

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X[100:], X_outliers]
    y_test = np.array([0] * 20 + [1] * 20)

    # fit the model
    clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)

    # predict scores (the lower, the more normal)
    y_pred = - clf.decision_function(X_test)

    # check that there is at most 6 errors (false positive or false negative)
    assert_greater(roc_auc_score(y_test, y_pred), 0.98) 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 6 votes vote down vote up
def test_deprecation():
    X = [[0.0], [1.0]]
    clf = IsolationForest()

    assert_warns_message(FutureWarning,
                         'default contamination parameter 0.1 will change '
                         'in version 0.22 to "auto"',
                         clf.fit, X)

    assert_warns_message(FutureWarning,
                         'behaviour="old" is deprecated and will be removed '
                         'in version 0.22',
                         clf.fit, X)

    clf = IsolationForest().fit(X)
    assert_warns_message(DeprecationWarning,
                         "threshold_ attribute is deprecated in 0.20 and will"
                         " be removed in 0.22.",
                         getattr, clf, "threshold_") 
Example 5
Project: safekit   Author: pnnl   File: iso_forest.py    License: MIT License 6 votes vote down vote up
def sample_hyps_iso_forest(nest, contam, boot):
    """

    :param nest:
    :param contam:
    :param boot:
    :return: An IsolationForest object with specified hyperparameters, used to detect anomaly.
    """

    n_estimators = nest # random.choice(range(20, 300))  # default is 100
    max_samples = 'auto'
    contamination = contam #randrange_float(0.0, 0.5, 0.05)
    max_features = 1.0 # default is 1.0 (use all features)
    bootstrap = boot # random.choice(['True', 'False'])
    n_jobs = -1  # Uses all cores
    verbose = 0

    model = IsolationForest(n_estimators=n_estimators, max_samples=max_samples,
                            contamination=contamination, max_features=max_features,
                            bootstrap=bootstrap, n_jobs=n_jobs, verbose=verbose)
    return model 
Example 6
Project: visualqc   Author: raamana   File: outliers.py    License: Apache License 2.0 6 votes vote down vote up
def run_isolation_forest(features, id_list, fraction_of_outliers=.3):
    """Performs anomaly detection based on Isolation Forest."""

    rng = np.random.RandomState(1984)

    num_samples = features.shape[0]
    iso_f = IsolationForest(max_samples=num_samples,
                            contamination=fraction_of_outliers,
                            random_state=rng)
    iso_f.fit(features)
    pred_scores = iso_f.decision_function(features)

    threshold = stats.scoreatpercentile(pred_scores, 100 * fraction_of_outliers)
    outlying_ids = id_list[pred_scores < threshold]

    return outlying_ids 
Example 7
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_iforest.py    License: MIT License 6 votes vote down vote up
def test_iforest_sparse():
    """Check IForest for various parameter settings on sparse input."""
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({"max_samples": [0.5, 1.0],
                          "bootstrap": [True, False]})

    for sparse_format in [csc_matrix, csr_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)

        for params in grid:
            # Trained on sparse format
            sparse_classifier = IsolationForest(
                n_estimators=10, random_state=1, **params).fit(X_train_sparse)
            sparse_results = sparse_classifier.predict(X_test_sparse)

            # Trained on dense format
            dense_classifier = IsolationForest(
                n_estimators=10, random_state=1, **params).fit(X_train)
            dense_results = dense_classifier.predict(X_test)

            assert_array_equal(sparse_results, dense_results) 
Example 8
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_iforest.py    License: MIT License 6 votes vote down vote up
def test_iforest_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data

    # Test max_samples
    assert_raises(ValueError,
                  IsolationForest(max_samples=-1).fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples=0.0).fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples=2.0).fit, X)
    # The dataset has less than 256 samples, explicitly setting
    # max_samples > n_samples should result in a warning. If not set
    # explicitly there should be no warning
    assert_warns_message(UserWarning,
                         "max_samples will be set to n_samples for estimation",
                         IsolationForest(max_samples=1000).fit, X)
    assert_no_warnings(IsolationForest(max_samples='auto').fit, X)
    assert_no_warnings(IsolationForest(max_samples=np.int64(2)).fit, X)
    assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X) 
Example 9
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_iforest.py    License: MIT License 6 votes vote down vote up
def test_iforest_performance():
    """Test Isolation Forest performs well"""

    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)
    X_train = np.r_[X + 2, X - 2]
    X_train = X[:100]

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X[100:], X_outliers]
    y_test = np.array([0] * 20 + [1] * 20)

    # fit the model
    clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)

    # predict scores (the lower, the more normal)
    y_pred = - clf.decision_function(X_test)

    # check that there is at most 6 errors (false positive or false negative)
    assert_greater(roc_auc_score(y_test, y_pred), 0.98) 
Example 10
Project: monasca-analytics   Author: openstack   File: isolation_forest.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, _id, _config):
        super(IsolationForest, self).__init__(_id, _config)
        self._nb_samples = int(_config['nb_samples']) 
Example 11
Project: monasca-analytics   Author: openstack   File: isolation_forest.py    License: Apache License 2.0 5 votes vote down vote up
def get_default_config():
        return {
            'module': IsolationForest.__name__,
            'nb_samples': N_SAMPLES
        } 
Example 12
Project: monasca-analytics   Author: openstack   File: isolation_forest.py    License: Apache License 2.0 5 votes vote down vote up
def _get_best_detector(self, train):
        detector = ensemble.IsolationForest()
        detector.fit(train)
        return detector 
Example 13
Project: monasca-analytics   Author: openstack   File: test_isolation_forest.py    License: Apache License 2.0 5 votes vote down vote up
def setUp(self):
        super(TestIsolationForest, self).setUp()
        self.if_sml = isolation_forest.IsolationForest(
            "fakeid", {"module": "fake", "nb_samples": 1000}) 
Example 14
Project: monasca-analytics   Author: openstack   File: test_isolation_forest.py    License: Apache License 2.0 5 votes vote down vote up
def test_learn_structure(self):
        data = self.get_testing_data()
        clf = self.if_sml.learn_structure(data)
        self.assertIsInstance(clf, ensemble.IsolationForest) 
Example 15
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_iforest():
    """Check Isolation Forest for various parameter settings."""
    X_train = np.array([[0, 1], [1, 2]])
    X_test = np.array([[2, 1], [1, 1]])

    grid = ParameterGrid({"n_estimators": [3],
                          "max_samples": [0.5, 1.0, 3],
                          "bootstrap": [True, False]})

    with ignore_warnings():
        for params in grid:
            IsolationForest(random_state=rng,
                            **params).fit(X_train).predict(X_test) 
Example 16
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_iforest_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data

    # Test max_samples
    assert_raises(ValueError,
                  IsolationForest(max_samples=-1).fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples=0.0).fit, X)
    assert_raises(ValueError,
                  IsolationForest(max_samples=2.0).fit, X)
    # The dataset has less than 256 samples, explicitly setting
    # max_samples > n_samples should result in a warning. If not set
    # explicitly there should be no warning
    assert_warns_message(UserWarning,
                         "max_samples will be set to n_samples for estimation",
                         IsolationForest(max_samples=1000).fit, X)
    # note that assert_no_warnings does not apply since it enables a
    # PendingDeprecationWarning triggered by scipy.sparse's use of
    # np.matrix. See issue #11251.
    with pytest.warns(None) as record:
        IsolationForest(max_samples='auto').fit(X)
    user_warnings = [each for each in record
                     if issubclass(each.category, UserWarning)]
    assert len(user_warnings) == 0
    with pytest.warns(None) as record:
        IsolationForest(max_samples=np.int64(2)).fit(X)
    user_warnings = [each for each in record
                     if issubclass(each.category, UserWarning)]
    assert len(user_warnings) == 0

    assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X)

    # test X_test n_features match X_train one:
    assert_raises(ValueError, IsolationForest().fit(X).predict, X[:, 1:])

    # test threshold_ attribute error when behaviour is not old:
    msg = "threshold_ attribute does not exist when behaviour != 'old'"
    assert_raises_regex(AttributeError, msg, getattr,
                        IsolationForest(behaviour='new'), 'threshold_') 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_recalculate_max_depth():
    """Check max_depth recalculation when max_samples is reset to n_samples"""
    X = iris.data
    clf = IsolationForest().fit(X)
    for est in clf.estimators_:
        assert_equal(est.max_depth, int(np.ceil(np.log2(X.shape[0])))) 
Example 18
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_max_samples_attribute():
    X = iris.data
    clf = IsolationForest().fit(X)
    assert_equal(clf.max_samples_, X.shape[0])

    clf = IsolationForest(max_samples=500)
    assert_warns_message(UserWarning,
                         "max_samples will be set to n_samples for estimation",
                         clf.fit, X)
    assert_equal(clf.max_samples_, X.shape[0])

    clf = IsolationForest(max_samples=0.4).fit(X)
    assert_equal(clf.max_samples_, 0.4*X.shape[0]) 
Example 19
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_iforest_works(contamination):
    # toy sample (the last two samples are outliers)
    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [-4, 7]]

    # Test IsolationForest
    clf = IsolationForest(
        behaviour="new", random_state=rng, contamination=contamination
    )
    clf.fit(X)
    decision_func = -clf.decision_function(X)
    pred = clf.predict(X)
    # assert detect outliers:
    assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
    assert_array_equal(pred, 6 * [1] + 2 * [-1]) 
Example 20
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_max_samples_consistency():
    # Make sure validated max_samples in iforest and BaseBagging are identical
    X = iris.data
    clf = IsolationForest().fit(X)
    assert_equal(clf.max_samples_, clf._max_samples) 
Example 21
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_iforest_subsampled_features():
    # It tests non-regression for #5732 which failed at predict.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    clf = IsolationForest(max_features=0.8)
    clf.fit(X_train, y_train)
    clf.predict(X_test) 
Example 22
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_score_samples():
    X_train = [[1, 1], [1, 2], [2, 1]]
    clf1 = IsolationForest(contamination=0.1).fit(X_train)
    clf2 = IsolationForest().fit(X_train)
    assert_array_equal(clf1.score_samples([[2., 2.]]),
                       clf1.decision_function([[2., 2.]]) + clf1.offset_)
    assert_array_equal(clf2.score_samples([[2., 2.]]),
                       clf2.decision_function([[2., 2.]]) + clf2.offset_)
    assert_array_equal(clf1.score_samples([[2., 2.]]),
                       clf2.score_samples([[2., 2.]])) 
Example 23
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_iforest.py    License: MIT License 5 votes vote down vote up
def test_behaviour_param():
    X_train = [[1, 1], [1, 2], [2, 1]]
    clf1 = IsolationForest(behaviour='old').fit(X_train)
    clf2 = IsolationForest(behaviour='new', contamination='auto').fit(X_train)
    assert_array_equal(clf1.decision_function([[2., 2.]]),
                       clf2.decision_function([[2., 2.]]))


# mock get_chunk_n_rows to actually test more than one chunk (here one
# chunk = 3 rows: 
Example 24
Project: batea   Author: delvelabs   File: model.py    License: GNU General Public License v2.0 5 votes vote down vote up
def build_model(self, outlier_ratio=0.1, n_estimators=100, max_samples='auto'):
        self.model = IsolationForest(contamination=outlier_ratio,
                                     n_estimators=n_estimators,
                                     max_samples=max_samples,
                                     behaviour='new') 
Example 25
Project: kenchi   Author: Y-oHr-N   File: ensemble.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _fit(self, X):
        self.estimator_   = IsolationForest(
            behaviour     = 'new',
            bootstrap     = self.bootstrap,
            contamination = self.contamination,
            max_features  = self.max_features,
            max_samples   = self.max_samples,
            n_estimators  = self.n_estimators,
            n_jobs        = self.n_jobs,
            random_state  = self.random_state
        ).fit(X)

        return self 
Example 26
Project: Frcwp   Author: sladesha   File: slicing.py    License: MIT License 5 votes vote down vote up
def fit(self):
        isf = IsolationForest(n_estimators=self.nestimators, contamination=self.contamination)
        isf.fit(self.X)
        self.ift = isf 
Example 27
Project: ad_examples   Author: shubhomoydas   File: random_split_trees.py    License: MIT License 5 votes vote down vote up
def fit(self, X, y=None, sample_weight=None):
        self.ifor = IsolationForest(n_estimators=self.n_estimators,
                                    max_samples=self.max_samples,
                                    contamination=self.contamination,
                                    max_features=self.max_features,
                                    bootstrap=self.bootstrap,
                                    n_jobs=self.n_jobs,
                                    random_state=self.random_state,
                                    verbose=self.verbose)
        self.ifor.fit(X, y, sample_weight)
        self.estimators_ = self.ifor.estimators_
        self.estimators_features_ = self.ifor.estimators_features_
        self.updated = False 
Example 28
Project: ad_examples   Author: shubhomoydas   File: multiview_forest.py    License: MIT License 5 votes vote down vote up
def _multiview_fit(self, X, y, feature_partitions, n_estimators_view):
        n_features = X.shape[1]

        estimators_group = []
        feature_offset = 0
        logger.debug("IForestMultiview n_estimators_view: %s" % str(list(n_estimators_view)))
        for n_feats, n_est_ in zip(feature_partitions, n_estimators_view):
            estimators = []
            X_ = X[:, feature_offset:(feature_offset+n_feats)]

            if n_est_ > 0:
                # contruct isolation forest for the view containing just the feature subset
                ifor_ = IsolationForest(n_estimators=n_est_,
                                        max_samples=self.max_samples,
                                        contamination=self.contamination,
                                        max_features=self.max_features,
                                        bootstrap=self.bootstrap,
                                        n_jobs=self.n_jobs,
                                        random_state=self.random_state,
                                        verbose=self.verbose)
                ifor_.fit(X_, y, sample_weight=None)

                for tree in ifor_.estimators_:
                    # The IsolationForest trees contain read-only properties. We copy
                    # over all the properties to our custom tree structure so that we
                    # can modify them if needed.
                    ifor_mv_estimator = IForestMultiviewTree(n_features=n_features, ifor_tree=tree.tree_)

                    # adjust the feature indexes at the tree nodes.
                    ifor_mv_estimator.tree_.feature += feature_offset

                    estimators.append(ifor_mv_estimator)

            estimators_group.append(estimators)
            feature_offset += n_feats

        return estimators_group 
Example 29
Project: ad_examples   Author: shubhomoydas   File: test_iso_gan.py    License: MIT License 5 votes vote down vote up
def get_iso_model(x, y, opts):
    outliers_fraction = 0.1
    ifor_random_state = opts.randseed
    iso_model = IsolationForest(n_estimators=100, max_samples=256,
                                contamination=outliers_fraction,
                                random_state=ifor_random_state)
    iso_model.fit(x)
    r = np.reshape(iso_model.decision_function(x), (-1, 1))
    # logger.debug("iforest r:\n%s" % str(list(r)))
    return iso_model, r 
Example 30
Project: mltk-algo-contrib   Author: splunk   File: IsolationForest.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self,options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params',{}),
            ints = ['n_estimators','n_jobs','random_state','verbose'],
            floats = ['max_samples','contamination','max_features'],
            bools = ['bootstrap']
            )
        self.return_scores = out_params.pop('anomaly_score', True)

        # whitelist n_estimators > 0
        if 'n_estimators' in out_params and out_params['n_estimators']<=0:
            msg = 'Invalid value error: n_estimators must be greater than 0 and an integer, but found n_estimators="{}".'
            raise RuntimeError(msg.format(out_params['n_estimators']))
        
        # whitelist max_samples > 0 and < 1
        if 'max_samples' in out_params and out_params['max_samples']<0 and out_params['max_samples']>1:
            msg = 'Invalid value error: max_samples must be greater than 0 and a float, but found max_samples="{}".'
            raise RuntimeError(msg.format(out_params['max_samples']))
        
        #   whitelist contamination should be in (0.0, 0.5] as error raised by sklearn for values out of range
        if 'contamination' in out_params and not (0.0 < out_params['contamination'] <= 0.5):
            msg = (
                'Invalid value error: Valid values for contamination are in (0.0, 0.5], '
                'but found contamination="{}".'
            )
            raise RuntimeError(msg.format(out_params['contamination']))

        # whitelist max_features > 0 and < 1
        if 'max_features' in out_params and out_params['max_features']<0 and out_params['max_features']>1:
            msg = 'Invalid value error: max_features must be greater than 0, but found max_features="{}".'
            raise RuntimeError(msg.format(out_params['max_features']))

        
        self.estimator = _IsolationForest(**out_params)