Python sklearn.tree._tree.TREE_LEAF Examples

The following are 17 code examples of sklearn.tree._tree.TREE_LEAF(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.tree._tree , or try the search function .
Example #1
Source File: treeinterpreter.py    From treeinterpreter with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _get_tree_paths(tree, node_id, depth=0):
    """
    Returns all paths through the tree as list of node_ids
    """
    if node_id == _tree.TREE_LEAF:
        raise ValueError("Invalid node_id %s" % _tree.TREE_LEAF)

    left_child = tree.children_left[node_id]
    right_child = tree.children_right[node_id]

    if left_child != _tree.TREE_LEAF:
        left_paths = _get_tree_paths(tree, left_child, depth=depth + 1)
        right_paths = _get_tree_paths(tree, right_child, depth=depth + 1)

        for path in left_paths:
            path.append(node_id)
        for path in right_paths:
            path.append(node_id)
        paths = left_paths + right_paths
    else:
        paths = [[node_id]]
    return paths 
Example #2
Source File: example.py    From random-forest-leaf-visualization with MIT License 6 votes vote down vote up
def leaf_depths(tree, node_id = 0):
    
    left_child = tree.children_left[node_id]
    right_child = tree.children_right[node_id]

    if left_child == _tree.TREE_LEAF:
        
        depths = np.array([0])

    else:
        
        left_depths = leaf_depths(tree, left_child) + 1
        right_depths = leaf_depths(tree, right_child) + 1

        depths = np.append(left_depths, right_depths)

    return depths 
Example #3
Source File: test_tree.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def check_decision_path(name):
    X = iris.data
    y = iris.target
    n_samples = X.shape[0]

    TreeEstimator = ALL_TREES[name]
    est = TreeEstimator(random_state=0, max_depth=2)
    est.fit(X, y)

    node_indicator_csr = est.decision_path(X)
    node_indicator = node_indicator_csr.toarray()
    assert_equal(node_indicator.shape, (n_samples, est.tree_.node_count))

    # Assert that leaves index are correct
    leaves = est.apply(X)
    leave_indicator = [node_indicator[i, j] for i, j in enumerate(leaves)]
    assert_array_almost_equal(leave_indicator, np.ones(shape=n_samples))

    # Ensure only one leave node per sample
    all_leaves = est.tree_.children_left == TREE_LEAF
    assert_array_almost_equal(np.dot(node_indicator, all_leaves),
                              np.ones(shape=n_samples))

    # Ensure max depth is consistent with sum of indicator
    max_depth = node_indicator.sum(axis=1).max()
    assert_less_equal(est.tree_.max_depth, max_depth) 
Example #4
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_complete_classification():
    # Test greedy trees with max_depth + 1 leafs.
    from sklearn.tree._tree import TREE_LEAF
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    k = 4

    est = GradientBoostingClassifier(n_estimators=20, max_depth=None,
                                     random_state=1, max_leaf_nodes=k + 1)
    est.fit(X, y)

    tree = est.estimators_[0, 0].tree_
    assert_equal(tree.max_depth, k)
    assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0],
                 k + 1) 
Example #5
Source File: test_tree.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def assert_tree_equal(d, s, message):
    assert_equal(s.node_count, d.node_count,
                 "{0}: inequal number of node ({1} != {2})"
                 "".format(message, s.node_count, d.node_count))

    assert_array_equal(d.children_right, s.children_right,
                       message + ": inequal children_right")
    assert_array_equal(d.children_left, s.children_left,
                       message + ": inequal children_left")

    external = d.children_right == TREE_LEAF
    internal = np.logical_not(external)

    assert_array_equal(d.feature[internal], s.feature[internal],
                       message + ": inequal features")
    assert_array_equal(d.threshold[internal], s.threshold[internal],
                       message + ": inequal threshold")
    assert_array_equal(d.n_node_samples.sum(), s.n_node_samples.sum(),
                       message + ": inequal sum(n_node_samples)")
    assert_array_equal(d.n_node_samples, s.n_node_samples,
                       message + ": inequal n_node_samples")

    assert_almost_equal(d.impurity, s.impurity,
                        err_msg=message + ": inequal impurity")

    assert_array_almost_equal(d.value[external], s.value[external],
                              err_msg=message + ": inequal value") 
Example #6
Source File: test_gradient_boosting.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_complete_regression():
    # Test greedy trees with max_depth + 1 leafs.
    from sklearn.tree._tree import TREE_LEAF
    k = 4

    est = GradientBoostingRegressor(n_estimators=20, max_depth=None,
                                    random_state=1, max_leaf_nodes=k + 1)
    est.fit(boston.data, boston.target)

    tree = est.estimators_[-1, 0].tree_
    assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0],
                 k + 1) 
Example #7
Source File: test_gradient_boosting.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_complete_classification():
    # Test greedy trees with max_depth + 1 leafs.
    from sklearn.tree._tree import TREE_LEAF
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    k = 4

    est = GradientBoostingClassifier(n_estimators=20, max_depth=None,
                                     random_state=1, max_leaf_nodes=k + 1)
    est.fit(X, y)

    tree = est.estimators_[0, 0].tree_
    assert_equal(tree.max_depth, k)
    assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0],
                 k + 1) 
Example #8
Source File: example.py    From random-forest-leaf-visualization with MIT License 5 votes vote down vote up
def leaf_samples(tree, node_id = 0):
    
    left_child = tree.children_left[node_id]
    right_child = tree.children_right[node_id]

    if left_child == _tree.TREE_LEAF:
        
        samples = np.array([tree.n_node_samples[node_id]])

    else:
        
        left_samples = leaf_samples(tree, left_child)
        right_samples = leaf_samples(tree, right_child)

        samples = np.append(left_samples, right_samples)

    return samples 
Example #9
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_empty_leaf_infinite_threshold():
    # try to make empty leaf by using near infinite value.
    data = np.random.RandomState(0).randn(100, 11) * 2e38
    data = np.nan_to_num(data.astype('float32'))
    X_full = data[:, :-1]
    X_sparse = csc_matrix(X_full)
    y = data[:, -1]
    for X in [X_full, X_sparse]:
        tree = DecisionTreeRegressor(random_state=0).fit(X, y)
        terminal_regions = tree.apply(X)
        left_leaf = set(np.where(tree.tree_.children_left == TREE_LEAF)[0])
        empty_leaf = left_leaf.difference(terminal_regions)
        infinite_threshold = np.where(~np.isfinite(tree.tree_.threshold))[0]
        assert len(infinite_threshold) == 0
        assert len(empty_leaf) == 0 
Example #10
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def assert_tree_equal(d, s, message):
    assert_equal(s.node_count, d.node_count,
                 "{0}: inequal number of node ({1} != {2})"
                 "".format(message, s.node_count, d.node_count))

    assert_array_equal(d.children_right, s.children_right,
                       message + ": inequal children_right")
    assert_array_equal(d.children_left, s.children_left,
                       message + ": inequal children_left")

    external = d.children_right == TREE_LEAF
    internal = np.logical_not(external)

    assert_array_equal(d.feature[internal], s.feature[internal],
                       message + ": inequal features")
    assert_array_equal(d.threshold[internal], s.threshold[internal],
                       message + ": inequal threshold")
    assert_array_equal(d.n_node_samples.sum(), s.n_node_samples.sum(),
                       message + ": inequal sum(n_node_samples)")
    assert_array_equal(d.n_node_samples, s.n_node_samples,
                       message + ": inequal n_node_samples")

    assert_almost_equal(d.impurity, s.impurity,
                        err_msg=message + ": inequal impurity")

    assert_array_almost_equal(d.value[external], s.value[external],
                              err_msg=message + ": inequal value") 
Example #11
Source File: test_gradient_boosting.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_complete_regression():
    # Test greedy trees with max_depth + 1 leafs.
    from sklearn.tree._tree import TREE_LEAF
    k = 4

    est = GradientBoostingRegressor(n_estimators=20, max_depth=None,
                                    random_state=1, max_leaf_nodes=k + 1)
    est.fit(boston.data, boston.target)

    tree = est.estimators_[-1, 0].tree_
    assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0],
                 k + 1) 
Example #12
Source File: display.py    From diogenes with MIT License 4 votes vote down vote up
def feature_pairs_in_tree(dt):
    """Lists subsequent features sorted by importance

    Parameters
    ----------
    dt : sklearn.tree.DecisionTreeClassifer

    Returns
    -------
    list of list of tuple of int :
        Going from inside to out:

        1. Each int is a feature that a node split on
    
        2. If two ints appear in the same tuple, then there was a node
           that split on the second feature immediately below a node
           that split on the first feature

        3. Tuples appearing in the same inner list appear at the same
           depth in the tree

        4. The outer list describes the entire tree

    """
    if not isinstance(dt, DecisionTreeClassifier):
        raise ValueError('dt must be an sklearn.tree.DecisionTreeClassifier')
    t = dt.tree_
    feature = t.feature
    children_left = t.children_left
    children_right = t.children_right
    result = []
    if t.children_left[0] == TREE_LEAF:
        return result
    next_queue = [0]
    while next_queue:
        this_queue = next_queue
        next_queue = []
        results_this_depth = []
        while this_queue:
            node = this_queue.pop()
            left_child = children_left[node]
            right_child = children_right[node]
            if children_left[left_child] != TREE_LEAF:
                results_this_depth.append(tuple(sorted(
                    (feature[node], 
                     feature[left_child]))))
                next_queue.append(left_child)
            if children_left[right_child] != TREE_LEAF:
                results_this_depth.append(tuple(sorted(
                    (feature[node], 
                     feature[right_child]))))
                next_queue.append(right_child)
        result.append(results_this_depth)
    result.pop() # The last results are always empty
    return result 
Example #13
Source File: tree.py    From sklearn-pmml with MIT License 4 votes vote down vote up
def _transform_node(self, tree, index, input_schema, output_feature, enter_condition=None):
        """
        Recursive mapping of sklearn Tree into PMML Node tree
        :return: Node element
        """
        assert isinstance(tree, Tree)
        assert isinstance(input_schema, list)
        assert isinstance(output_feature, Feature)

        node = pmml.Node()
        if enter_condition is None:
            node.append(pmml.True_())
        else:
            node.append(enter_condition)
        node.recordCount = tree.n_node_samples[index]

        if tree.children_left[index] != TREE_LEAF:
            feature = input_schema[tree.feature[index]]
            assert isinstance(feature, Feature)
            left_child = self._transform_node(
                tree,
                tree.children_left[index],
                input_schema,
                output_feature,
                enter_condition=pmml.SimplePredicate(
                    field=feature.full_name, operator=DecisionTreeConverter.OPERATOR_LE, value_=tree.threshold[index]
                )
            )
            right_child = self._transform_node(tree, tree.children_right[index], input_schema, output_feature)
            if self.model_function == ModelMode.CLASSIFICATION:
                score, score_prob = None, 0.0
                for i in range(len(tree.value[index][0])):
                    left_score = left_child.ScoreDistribution[i]
                    right_score = right_child.ScoreDistribution[i]
                    prob = float(left_score.recordCount + right_score.recordCount) / node.recordCount
                    node.append(pmml.ScoreDistribution(
                        recordCount=left_score.recordCount + right_score.recordCount,
                        value_=left_score.value_,
                        confidence=prob
                    ))
                    if score_prob < prob:
                        score, score_prob = left_score.value_, prob
                node.score = score
            node.append(left_child).append(right_child)

        else:
            node_value = np.array(tree.value[index][0])
            if self.model_function == ModelMode.CLASSIFICATION:
                probs = node_value / float(node_value.sum())
                for i in range(len(probs)):
                    node.append(pmml.ScoreDistribution(
                        confidence=probs[i],
                        recordCount=node_value[i],
                        value_=output_feature.from_number(i)
                    ))
                node.score = output_feature.from_number(probs.argmax())
            elif self.model_function == ModelMode.REGRESSION:
                node.score = node_value[0]

        return node 
Example #14
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def test_sample_weight():
    # Check sample weighting.
    # Test that zero-weighted samples are not taken into account
    X = np.arange(100)[:, np.newaxis]
    y = np.ones(100)
    y[:50] = 0.0

    sample_weight = np.ones(100)
    sample_weight[y == 0] = 0.0

    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_array_equal(clf.predict(X), np.ones(100))

    # Test that low weighted samples are not taken into account at low depth
    X = np.arange(200)[:, np.newaxis]
    y = np.zeros(200)
    y[50:100] = 1
    y[100:200] = 2
    X[100:200, 0] = 200

    sample_weight = np.ones(200)

    sample_weight[y == 2] = .51  # Samples of class '2' are still weightier
    clf = DecisionTreeClassifier(max_depth=1, random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_equal(clf.tree_.threshold[0], 149.5)

    sample_weight[y == 2] = .5  # Samples of class '2' are no longer weightier
    clf = DecisionTreeClassifier(max_depth=1, random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_equal(clf.tree_.threshold[0], 49.5)  # Threshold should have moved

    # Test that sample weighting is the same as having duplicates
    X = iris.data
    y = iris.target

    duplicates = rng.randint(0, X.shape[0], 100)

    clf = DecisionTreeClassifier(random_state=1)
    clf.fit(X[duplicates], y[duplicates])

    sample_weight = np.bincount(duplicates, minlength=X.shape[0])
    clf2 = DecisionTreeClassifier(random_state=1)
    clf2.fit(X, y, sample_weight=sample_weight)

    internal = clf.tree_.children_left != tree._tree.TREE_LEAF
    assert_array_almost_equal(clf.tree_.threshold[internal],
                              clf2.tree_.threshold[internal]) 
Example #15
Source File: test_tree.py    From Mastering-Elasticsearch-7.0 with MIT License 4 votes vote down vote up
def test_min_impurity_split():
    # test if min_impurity_split creates leaves with impurity
    # [0, min_impurity_split) when min_samples_leaf = 1 and
    # min_samples_split = 2.
    X = np.asfortranarray(iris.data, dtype=tree._tree.DTYPE)
    y = iris.target

    # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
    # by setting max_leaf_nodes
    for max_leaf_nodes, name in product((None, 1000), ALL_TREES.keys()):
        TreeEstimator = ALL_TREES[name]
        min_impurity_split = .5

        # verify leaf nodes without min_impurity_split less than
        # impurity 1e-7
        est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                            random_state=0)
        assert est.min_impurity_split is None, (
            "Failed, min_impurity_split = {0} > 1e-7".format(
                est.min_impurity_split))
        try:
            assert_warns(DeprecationWarning, est.fit, X, y)
        except AssertionError:
            pass
        for node in range(est.tree_.node_count):
            if (est.tree_.children_left[node] == TREE_LEAF or
                    est.tree_.children_right[node] == TREE_LEAF):
                assert_equal(est.tree_.impurity[node], 0.,
                             "Failed with {0} "
                             "min_impurity_split={1}".format(
                                 est.tree_.impurity[node],
                                 est.min_impurity_split))

        # verify leaf nodes have impurity [0,min_impurity_split] when using
        # min_impurity_split
        est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                            min_impurity_split=min_impurity_split,
                            random_state=0)
        assert_warns_message(DeprecationWarning,
                             "Use the min_impurity_decrease",
                             est.fit, X, y)
        for node in range(est.tree_.node_count):
            if (est.tree_.children_left[node] == TREE_LEAF or
                    est.tree_.children_right[node] == TREE_LEAF):
                assert_greater_equal(est.tree_.impurity[node], 0,
                                     "Failed with {0}, "
                                     "min_impurity_split={1}".format(
                                         est.tree_.impurity[node],
                                         est.min_impurity_split))
                assert_less_equal(est.tree_.impurity[node], min_impurity_split,
                                  "Failed with {0}, "
                                  "min_impurity_split={1}".format(
                                      est.tree_.impurity[node],
                                      est.min_impurity_split)) 
Example #16
Source File: test_tree.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def test_min_impurity_split():
    # test if min_impurity_split creates leaves with impurity
    # [0, min_impurity_split) when min_samples_leaf = 1 and
    # min_samples_split = 2.
    X = np.asfortranarray(iris.data.astype(tree._tree.DTYPE))
    y = iris.target

    # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
    # by setting max_leaf_nodes
    for max_leaf_nodes, name in product((None, 1000), ALL_TREES.keys()):
        TreeEstimator = ALL_TREES[name]
        min_impurity_split = .5

        # verify leaf nodes without min_impurity_split less than
        # impurity 1e-7
        est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                            random_state=0)
        assert_true(est.min_impurity_split is None,
                    "Failed, min_impurity_split = {0} > 1e-7".format(
                        est.min_impurity_split))
        try:
            assert_warns(DeprecationWarning, est.fit, X, y)
        except AssertionError:
            pass
        for node in range(est.tree_.node_count):
            if (est.tree_.children_left[node] == TREE_LEAF or
                    est.tree_.children_right[node] == TREE_LEAF):
                assert_equal(est.tree_.impurity[node], 0.,
                             "Failed with {0} "
                             "min_impurity_split={1}".format(
                                 est.tree_.impurity[node],
                                 est.min_impurity_split))

        # verify leaf nodes have impurity [0,min_impurity_split] when using
        # min_impurity_split
        est = TreeEstimator(max_leaf_nodes=max_leaf_nodes,
                            min_impurity_split=min_impurity_split,
                            random_state=0)
        assert_warns_message(DeprecationWarning,
                             "Use the min_impurity_decrease",
                             est.fit, X, y)
        for node in range(est.tree_.node_count):
            if (est.tree_.children_left[node] == TREE_LEAF or
                    est.tree_.children_right[node] == TREE_LEAF):
                assert_greater_equal(est.tree_.impurity[node], 0,
                                     "Failed with {0}, "
                                     "min_impurity_split={1}".format(
                                         est.tree_.impurity[node],
                                         est.min_impurity_split))
                assert_less_equal(est.tree_.impurity[node], min_impurity_split,
                                  "Failed with {0}, "
                                  "min_impurity_split={1}".format(
                                      est.tree_.impurity[node],
                                      est.min_impurity_split)) 
Example #17
Source File: test_tree.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def test_sample_weight():
    # Check sample weighting.
    # Test that zero-weighted samples are not taken into account
    X = np.arange(100)[:, np.newaxis]
    y = np.ones(100)
    y[:50] = 0.0

    sample_weight = np.ones(100)
    sample_weight[y == 0] = 0.0

    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_array_equal(clf.predict(X), np.ones(100))

    # Test that low weighted samples are not taken into account at low depth
    X = np.arange(200)[:, np.newaxis]
    y = np.zeros(200)
    y[50:100] = 1
    y[100:200] = 2
    X[100:200, 0] = 200

    sample_weight = np.ones(200)

    sample_weight[y == 2] = .51  # Samples of class '2' are still weightier
    clf = DecisionTreeClassifier(max_depth=1, random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_equal(clf.tree_.threshold[0], 149.5)

    sample_weight[y == 2] = .5  # Samples of class '2' are no longer weightier
    clf = DecisionTreeClassifier(max_depth=1, random_state=0)
    clf.fit(X, y, sample_weight=sample_weight)
    assert_equal(clf.tree_.threshold[0], 49.5)  # Threshold should have moved

    # Test that sample weighting is the same as having duplicates
    X = iris.data
    y = iris.target

    duplicates = rng.randint(0, X.shape[0], 100)

    clf = DecisionTreeClassifier(random_state=1)
    clf.fit(X[duplicates], y[duplicates])

    sample_weight = np.bincount(duplicates, minlength=X.shape[0])
    clf2 = DecisionTreeClassifier(random_state=1)
    clf2.fit(X, y, sample_weight=sample_weight)

    internal = clf.tree_.children_left != tree._tree.TREE_LEAF
    assert_array_almost_equal(clf.tree_.threshold[internal],
                              clf2.tree_.threshold[internal])