python source code of test

import numpy as np
import pytest

from sklearn.multioutput import MultiOutputRegressor
from numpy.testing import assert_array_equal
from numpy.testing import assert_equal
from numpy.testing import assert_raises

from skopt import gp_minimize
from skopt import forest_minimize
from skopt.benchmarks import bench1, bench1_with_time
from skopt.benchmarks import branin
from skopt.learning import ExtraTreesRegressor, RandomForestRegressor
from skopt.learning import GradientBoostingQuantileRegressor
from skopt.optimizer import Optimizer
from scipy.optimize import OptimizeResult


TREE_REGRESSORS = (ExtraTreesRegressor(random_state=2),
                   RandomForestRegressor(random_state=2),
                   GradientBoostingQuantileRegressor(random_state=2))
ACQ_FUNCS_PS = ["EIps", "PIps"]
ACQ_FUNCS_MIXED = ["EI", "EIps"]
ESTIMATOR_STRINGS = ["GP", "RF", "ET", "GBRT", "DUMMY",
                     "gp", "rf", "et", "gbrt", "dummy"]


@pytest.mark.fast_test
def test_multiple_asks():
    # calling ask() multiple times without a tell() inbetween should
    # be a "no op"
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1,
                    acq_optimizer="sampling")

    opt.run(bench1, n_iter=3)
    # tell() computes the next point ready for the next call to ask()
    # hence there are three after three iterations
    assert_equal(len(opt.models), 3)
    assert_equal(len(opt.Xi), 3)
    opt.ask()
    assert_equal(len(opt.models), 3)
    assert_equal(len(opt.Xi), 3)
    assert_equal(opt.ask(), opt.ask())
    opt.update_next()
    assert_equal(opt.ask(), opt.ask())


@pytest.mark.fast_test
def test_model_queue_size():
    # Check if model_queue_size limits the model queue size
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1,
                    acq_optimizer="sampling", model_queue_size=2)

    opt.run(bench1, n_iter=3)
    # tell() computes the next point ready for the next call to ask()
    # hence there are three after three iterations
    assert_equal(len(opt.models), 2)
    assert_equal(len(opt.Xi), 3)
    opt.ask()
    assert_equal(len(opt.models), 2)
    assert_equal(len(opt.Xi), 3)
    assert_equal(opt.ask(), opt.ask())


@pytest.mark.fast_test
def test_invalid_tell_arguments():
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1,
                    acq_optimizer="sampling")

    # can't have single point and multiple values for y
    assert_raises(ValueError, opt.tell, [1.], [1., 1.])


@pytest.mark.fast_test
def test_invalid_tell_arguments_list():
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1,
                    acq_optimizer="sampling")

    assert_raises(ValueError, opt.tell, [[1.], [2.]], [1., None])


@pytest.mark.fast_test
def test_bounds_checking_1D():
    low = -2.
    high = 2.
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(low, high)], base_estimator, n_initial_points=1,
                    acq_optimizer="sampling")

    assert_raises(ValueError, opt.tell, [high + 0.5], 2.)
    assert_raises(ValueError, opt.tell, [low - 0.5], 2.)
    # feed two points to tell() at once
    assert_raises(ValueError, opt.tell, [high + 0.5, high], (2., 3.))
    assert_raises(ValueError, opt.tell, [low - 0.5, high], (2., 3.))


@pytest.mark.fast_test
def test_bounds_checking_2D():
    low = -2.
    high = 2.
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(low, high), (low+4, high+4)], base_estimator,
                    n_initial_points=1, acq_optimizer="sampling")

    assert_raises(ValueError, opt.tell, [high + 0.5, high + 4.5], 2.)
    assert_raises(ValueError, opt.tell, [low - 0.5, low - 4.5], 2.)

    # first out, second in
    assert_raises(ValueError, opt.tell, [high + 0.5, high + 0.5], 2.)
    assert_raises(ValueError, opt.tell, [low - 0.5, high + 0.5], 2.)


@pytest.mark.fast_test
def test_bounds_checking_2D_multiple_points():
    low = -2.
    high = 2.
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(low, high), (low+4, high+4)], base_estimator,
                    n_initial_points=1, acq_optimizer="sampling")

    # first component out, second in
    assert_raises(ValueError, opt.tell,
                  [(high + 0.5, high + 0.5), (high + 0.5, high + 0.5)],
                  [2., 3.])
    assert_raises(ValueError, opt.tell,
                  [(low - 0.5, high + 0.5), (low - 0.5, high + 0.5)],
                  [2., 3.])


@pytest.mark.fast_test
def test_dimension_checking_1D():
    low = -2
    high = 2
    opt = Optimizer([(low, high)])
    with pytest.raises(ValueError) as e:
        # within bounds but one dimension too high
        opt.tell([low+1, low+1], 2.)
    assert "Dimensions of point " in str(e.value)


@pytest.mark.fast_test
def test_dimension_checking_2D():
    low = -2
    high = 2
    opt = Optimizer([(low, high), (low, high)])
    # within bounds but one dimension too little
    with pytest.raises(ValueError) as e:
        opt.tell([low+1, ], 2.)
    assert "Dimensions of point " in str(e.value)
    # within bounds but one dimension too much
    with pytest.raises(ValueError) as e:
        opt.tell([low+1, low+1, low+1], 2.)
    assert "Dimensions of point " in str(e.value)


@pytest.mark.fast_test
def test_dimension_checking_2D_multiple_points():
    low = -2
    high = 2
    opt = Optimizer([(low, high), (low, high)])
    # within bounds but one dimension too little
    with pytest.raises(ValueError) as e:
        opt.tell([[low+1, ], [low+1, low+2], [low+1, low+3]], 2.)
    assert "dimensions as the space" in str(e.value)
    # within bounds but one dimension too much
    with pytest.raises(ValueError) as e:
        opt.tell([[low + 1, low + 1, low + 1], [low + 1, low + 2],
                  [low + 1, low + 3]], 2.)
    assert "dimensions as the space" in str(e.value)


@pytest.mark.fast_test
def test_returns_result_object():
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1,
                    acq_optimizer="sampling")
    result = opt.tell([1.5], 2.)

    assert isinstance(result, OptimizeResult)
    assert_equal(len(result.x_iters), len(result.func_vals))
    assert_equal(np.min(result.func_vals), result.fun)


@pytest.mark.fast_test
@pytest.mark.parametrize("base_estimator", TREE_REGRESSORS)
def test_acq_optimizer(base_estimator):
    with pytest.raises(ValueError) as e:
        Optimizer([(-2.0, 2.0)], base_estimator=base_estimator,
                  n_initial_points=1, acq_optimizer='lbfgs')
    assert "should run with acq_optimizer='sampling'" in str(e.value)


@pytest.mark.parametrize("base_estimator", TREE_REGRESSORS)
@pytest.mark.parametrize("acq_func", ACQ_FUNCS_PS)
def test_acq_optimizer_with_time_api(base_estimator, acq_func):
    opt = Optimizer([(-2.0, 2.0),], base_estimator=base_estimator,
                    acq_func=acq_func,
                    acq_optimizer="sampling", n_initial_points=2)
    x1 = opt.ask()
    opt.tell(x1, (bench1(x1), 1.0))
    x2 = opt.ask()
    res = opt.tell(x2, (bench1(x2), 2.0))

    # x1 and x2 are random.
    assert x1 != x2

    assert len(res.models) == 1
    assert_array_equal(res.func_vals.shape, (2,))
    assert_array_equal(res.log_time.shape, (2,))

    # x3 = opt.ask()

    with pytest.raises(TypeError) as e:
        opt.tell(x2, bench1(x2))


@pytest.mark.fast_test
@pytest.mark.parametrize("acq_func", ACQ_FUNCS_MIXED)
def test_optimizer_copy(acq_func):
    # Checks that the base estimator, the objective and target values
    # are copied correctly.

    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, acq_func=acq_func,
                    n_initial_points=1, acq_optimizer="sampling")

    # run three iterations so that we have some points and objective values
    if "ps" in acq_func:
        opt.run(bench1_with_time, n_iter=3)
    else:
        opt.run(bench1, n_iter=3)

    opt_copy = opt.copy()

    copied_estimator = opt_copy.base_estimator_

    if "ps" in acq_func:
        assert isinstance(copied_estimator, MultiOutputRegressor)
        # check that the base_estimator is not wrapped multiple times
        is_multi = isinstance(copied_estimator.estimator,
                              MultiOutputRegressor)
        assert not is_multi
    else:
        assert not isinstance(copied_estimator, MultiOutputRegressor)

    assert_array_equal(opt_copy.Xi, opt.Xi)
    assert_array_equal(opt_copy.yi, opt.yi)


@pytest.mark.parametrize("base_estimator", ESTIMATOR_STRINGS)
def test_exhaust_initial_calls(base_estimator):
    # check a model is fitted and used to make suggestions after we added
    # at least n_initial_points via tell()
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=2,
                    acq_optimizer="sampling", random_state=1)

    x0 = opt.ask()  # random point
    x1 = opt.ask()  # random point
    assert x0 != x1
    # first call to tell()
    r1 = opt.tell(x1, 3.)
    assert len(r1.models) == 0
    x2 = opt.ask()  # random point
    assert x1 != x2
    # second call to tell()
    r2 = opt.tell(x2, 4.)
    if base_estimator.lower() == 'dummy':
        assert len(r2.models) == 0
    else:
        assert len(r2.models) == 1
    # this is the first non-random point
    x3 = opt.ask()
    assert x2 != x3
    x4 = opt.ask()
    r3 = opt.tell(x3, 1.)
    # no new information was added so should be the same, unless we are using
    # the dummy estimator which will forever return random points and never
    # fits any models
    if base_estimator.lower() == 'dummy':
        assert x3 != x4
        assert len(r3.models) == 0
    else:
        assert x3 == x4
        assert len(r3.models) == 2


@pytest.mark.fast_test
def test_optimizer_base_estimator_string_invalid():
    with pytest.raises(ValueError) as e:
        Optimizer([(-2.0, 2.0)], base_estimator="rtr",
                  n_initial_points=1)
    assert "'RF', 'ET', 'GP', 'GBRT' or 'DUMMY'" in str(e.value)


@pytest.mark.fast_test
@pytest.mark.parametrize("base_estimator", ESTIMATOR_STRINGS)
def test_optimizer_base_estimator_string_smoke(base_estimator):
    opt = Optimizer([(-2.0, 2.0)], base_estimator=base_estimator,
                    n_initial_points=1, acq_func="EI")
    opt.run(func=lambda x: x[0]**2, n_iter=3)


@pytest.mark.fast_test
def test_optimizer_base_estimator_string_smoke_njobs():
    opt = Optimizer([(-2.0, 2.0)], base_estimator="GBRT",
                    n_initial_points=1, acq_func="EI", n_jobs=-1)
    opt.run(func=lambda x: x[0]**2, n_iter=3)


def test_defaults_are_equivalent():
    # check that the defaults of Optimizer reproduce the defaults of
    # gp_minimize
    space = [(-5., 10.), (0., 15.)]
    #opt = Optimizer(space, 'ET', acq_func="EI", random_state=1)
    opt = Optimizer(space, random_state=1)

    for n in range(12):
        x = opt.ask()
        res_opt = opt.tell(x, branin(x))



    #res_min = forest_minimize(branin, space, n_calls=12, random_state=1)
    res_min = gp_minimize(branin, space, n_calls=12, random_state=1)

    assert res_min.space == res_opt.space
    # tolerate small differences in the points sampled
    assert np.allclose(res_min.x_iters, res_opt.x_iters)#, atol=1e-5)
    assert np.allclose(res_min.x, res_opt.x)#, atol=1e-5)

    res_opt2 = opt.get_result()
    assert np.allclose(res_min.x_iters, res_opt2.x_iters)  # , atol=1e-5)
    assert np.allclose(res_min.x, res_opt2.x)  # , atol=1e-5)


@pytest.mark.fast_test
def test_dimensions_names():
    from skopt.space import Real, Categorical, Integer
    # create search space and optimizer
    space = [Real(0, 1, name='real'),
             Categorical(['a', 'b', 'c'], name='cat'),
             Integer(0, 1, name='int')]
    opt = Optimizer(space, n_initial_points=1)
    # result of the optimizer missing dimension names
    result = opt.tell([(0.5, 'a', 0.5)], [3])
    names = []
    for d in result.space.dimensions:
        names.append(d.name)
    assert len(names) == 3
    assert "real" in names
    assert "cat" in names
    assert "int" in names
    assert None not in names


@pytest.mark.fast_test
def test_categorical_only():
    from skopt.space import Categorical
    cat1 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
    cat2 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

    opt = Optimizer([cat1, cat2])
    for n in range(15):
        x = opt.ask()
        res = opt.tell(x, 12 * n)
    assert len(res.x_iters) == 15
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4

    cat3 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"])
    cat4 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"])

    opt = Optimizer([cat3, cat4])
    for n in range(15):
        x = opt.ask()
        res = opt.tell(x, 12 * n)
    assert len(res.x_iters) == 15
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4


def test_categorical_only2():
    from numpy import linalg
    from skopt.space import Categorical
    from skopt.learning import GaussianProcessRegressor
    space = [Categorical([1, 2, 3]), Categorical([4, 5, 6])]
    opt = Optimizer(space,
                    base_estimator=GaussianProcessRegressor(alpha=1e-7),
                    acq_optimizer='lbfgs',
                    n_initial_points=10,
                    n_jobs=2)

    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4
    opt.tell(next_x, [linalg.norm(x) for x in next_x])
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4
    opt.tell(next_x, [linalg.norm(x) for x in next_x])
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4