Python sklearn.metrics.explained_variance_score() Examples

The following are 30 code examples of sklearn.metrics.explained_variance_score(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .
Example #1
Source File: abstract_regressor.py    From JusticeAI with MIT License 6 votes vote down vote up
def test(self):
        """
        Tests the regressor using the dataset and writes:
            1- coefficient r2
            2- explained variance
            3- mean absolute error
            4- mean squared error

        :return: None
        """
        X = np.array([precedent['facts_vector'][self.important_facts_index] for precedent in self.dataset])
        y_pred = self.model.predict(X)
        y_true = np.array([precedent['outcomes_vector'][self.outcome_index]
                           for precedent in self.dataset])
        r2 = metrics.r2_score(y_true, y_pred)
        variance = metrics.explained_variance_score(y_true, y_pred)
        mean_abs_error = metrics.mean_absolute_error(y_true, y_pred)
        mean_squared_error = metrics.mean_squared_error(y_true, y_pred)
        Log.write('R2: {0:.2f}'.format(r2))
        Log.write('Explained Variance: {0:.2f}'.format(variance))
        Log.write('Mean Absolute Error: {0:.2f}'.format(mean_abs_error))
        Log.write('Mean Squared Error: {0:.2f}'.format(mean_squared_error)) 
Example #2
Source File: test_validation.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cross_val_score(reg, X, y, cv=5,
                                     scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
Example #3
Source File: test_regression.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2) 
Example #4
Source File: main.py    From TrafficFlowPrediction with MIT License 6 votes vote down vote up
def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2) 
Example #5
Source File: ABuMLGrid.py    From abu with GNU General Public License v3.0 6 votes vote down vote up
def grid_search_init_n_components(estimator, x, y, n_components_range=None, cv=10, n_jobs=-1,
                                  scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数,
    具体阅读AbuMLCreater._estimators_prarms_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_components_range: 默认None, None则会使用:
            n_estimators_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,回归器使用
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_components': 10})
    """
    if n_components_range is None:
        n_components_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    return grid_search_init_kwargs(estimator, x, y, 'n_components', n_components_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
Example #6
Source File: test_metrics.py    From python-dlpy with Apache License 2.0 6 votes vote down vote up
def test_explained_variance_score(self):

        try:
            from sklearn.metrics import explained_variance_score as skevs
        except:
            unittest.TestCase.skipTest(self, "sklearn is not found in the libraries")

        skevs_score1 = skevs(self.local_reg1.target, self.local_reg1.p_target)
        dlpyevs_score1 = explained_variance_score('target', 'p_target', castable=self.reg_table1)

        self.assertAlmostEqual(skevs_score1, dlpyevs_score1)

        skevs_score2 = skevs(self.local_reg1.target, self.local_reg2.p_target)
        dlpyevs_score2 = explained_variance_score(self.reg_table1.target, self.reg_table2.p_target,
                                                  id_vars='id1')

        self.assertAlmostEqual(skevs_score2, dlpyevs_score2) 
Example #7
Source File: train_fingerprint_model.py    From KerasNeuralFingerprint with MIT License 6 votes vote down vote up
def eval_metrics_on(predictions, labels):
    '''
    assuming this is a regression task; labels are continuous-valued floats
    
    returns most regression-related scores for the given predictions/targets as a dictionary:
    
        r2, mean_abs_error, mse, rmse, median_absolute_error, explained_variance_score
    '''
    if len(labels[0])==2: #labels is list of data/labels pairs
        labels = np.concatenate([l[1] for l in labels])
    predictions = predictions[:,0]
    
    r2                       = metrics.r2_score(labels, predictions)
    mean_abs_error           = np.abs(predictions - labels).mean()
    mse                      = ((predictions - labels)**2).mean()
    rmse                     = np.sqrt(mse)
    median_absolute_error    = metrics.median_absolute_error(labels, predictions) # robust to outliers
    explained_variance_score = metrics.explained_variance_score(labels, predictions) # best score = 1, lower is worse
    return {'r2':r2, 'mean_abs_error':mean_abs_error, 'mse':mse, 'rmse':rmse, 
            'median_absolute_error':median_absolute_error, 
            'explained_variance_score':explained_variance_score} 
Example #8
Source File: test.py    From TrafficFlowPrediction with MIT License 6 votes vote down vote up
def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2) 
Example #9
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cross_val_score(reg, X, y, cv=5,
                                     scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
Example #10
Source File: test_regression.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2) 
Example #11
Source File: test_regression.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.]) 
Example #12
Source File: test_builder.py    From gordo with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score] 
Example #13
Source File: test_sad.py    From basenji with Apache License 2.0 5 votes vote down vote up
def test_multi(self):
    if os.path.isdir('sad/testrm'):
      shutil.rmtree('sad/testrm')

    sad_opts = '--rc --shifts "0,21"'
    sad_opts += ' -o sad/testrm -q "" -p 4'
    cmd = 'basenji_sad_ref_multi.py %s %s %s %s' % \
        (sad_opts, self.params_file, self.model_file, self.vcf_file)
    subprocess.call(cmd, shell=True)

    saved_h5 = h5py.File('sad/saved/sadr.h5', 'r')
    this_h5 = h5py.File('sad/testrm/sad.h5', 'r')

    saved_keys = sorted(saved_h5.keys())
    this_keys = sorted(this_h5.keys())
    assert(len(saved_keys) == len(this_keys))
    assert(saved_keys == this_keys)

    for key in saved_h5:
      if key[-4:] != '_pct':
        saved_value = saved_h5[key][:]
        this_value = this_h5[key][:]

        if saved_value.dtype.char == 'S':
          assert((saved_value == this_value).all())
          np.testing.assert_array_equal(saved_value, this_value)
        else:
          np.testing.assert_allclose(saved_value, this_value, atol=2e-1, rtol=2e-1)
          r2 = explained_variance_score(saved_value.flatten(), this_value.flatten())
          assert(r2 > 0.999)

    saved_h5.close()
    this_h5.close()

    shutil.rmtree('sad/testrm') 
Example #14
Source File: mssa.py    From pymssa with MIT License 5 votes vote down vote up
def _calculate_optimal_reconstruction_orders(self,
                                                 timeseries,
                                                 components):
        '''Calculates the optimal component ordering for reconstructing
        each of the timeseries. This is done by simply ranking the components
        in terms of how much variance they explain for each timeseries in the
        original data.
        '''

        optimal_orders = optimal_component_ordering(
            timeseries,
            components
        )

        optimal_orders = optimal_orders.astype(int)

        order_explained_variance = np.zeros_like(optimal_orders).astype(float)
        for ts_idx in range(timeseries.shape[1]):
            ts_comp = components[ts_idx, :, :]
            ts_comp = ts_comp[:, optimal_orders[:, ts_idx]]
            # ts_comp = np.cumsum(ts_comp, axis=1)

            order_explained_variance[:, ts_idx] = np.apply_along_axis(
                partial(explained_variance_score, timeseries[:, ts_idx]),
                0,
                ts_comp
            )

        return optimal_orders, order_explained_variance 
Example #15
Source File: test_regression.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.]) 
Example #16
Source File: test_regression.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) 
Example #17
Source File: test_builder.py    From gordo with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.linear_model.LinearRegression"
        }
    }
    data_config = get_random_data()

    evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
    if metrics_:
        evaluation_config.update({"metrics": metrics_})

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine = ModelBuilder(machine).build()

    expected_metrics = metrics_ or [
        "sklearn.metrics.explained_variance_score",
        "sklearn.metrics.r2_score",
        "sklearn.metrics.mean_squared_error",
        "sklearn.metrics.mean_absolute_error",
    ]

    assert all(
        metric.split(".")[-1].replace("_", "-")
        in machine.metadata.build_metadata.model.cross_validation.scores
        for metric in expected_metrics
    ) 
Example #18
Source File: 06B_learning_curves.py    From scipy_2015_sklearn_tutorial with Creative Commons Zero v1.0 Universal 5 votes vote down vote up
def plot_learning_curve(model, err_func=explained_variance_score, N=300, n_runs=10, n_sizes=50, ylim=None):
    sizes = np.linspace(5, N, n_sizes).astype(int)
    train_err = np.zeros((n_runs, n_sizes))
    validation_err = np.zeros((n_runs, n_sizes))
    for i in range(n_runs):
        for j, size in enumerate(sizes):
            xtrain, xtest, ytrain, ytest = train_test_split(
                X, y, train_size=size, random_state=i)
            # Train on only the first `size` points
            model.fit(xtrain, ytrain)
            validation_err[i, j] = err_func(ytest, model.predict(xtest))
            train_err[i, j] = err_func(ytrain, model.predict(xtrain))

    plt.plot(sizes, validation_err.mean(axis=0), lw=2, label='validation')
    plt.plot(sizes, train_err.mean(axis=0), lw=2, label='training')

    plt.xlabel('traning set size')
    plt.ylabel(err_func.__name__.replace('_', ' '))
    
    plt.grid(True)
    
    plt.legend(loc=0)
    
    plt.xlim(0, N-1)
    
    if ylim:
        plt.ylim(ylim) 
Example #19
Source File: ABuMLGrid.py    From abu with GNU General Public License v3.0 5 votes vote down vote up
def grid_search_init_n_neighbors(estimator, x, y, n_neighbors_range=None, cv=10, n_jobs=-1,
                                 scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.knn_classifier_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_neighbors_range: 默认None, None则会使用:
            n_estimators_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_components': 10})
    """

    if n_neighbors_range is None:
        # 邻居投票者控制在1-np.minimum(26, 总数的1/3)
        n_neighbors_range = np.arange(1, np.minimum(26, int(x.shape[0] / 3)), 1)

    return grid_search_init_kwargs(estimator, x, y, 'n_neighbors', n_neighbors_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
Example #20
Source File: ABuMLGrid.py    From abu with GNU General Public License v3.0 5 votes vote down vote up
def grid_search_init_max_depth(estimator, x, y, max_depth_range=None, cv=10, n_jobs=-1,
                               scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.decision_tree_classifier_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param max_depth_range: 默认None, None则会使用:
            max_depth_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'max_depth': 3})
    """

    if max_depth_range is None:
        max_depth_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    return grid_search_init_kwargs(estimator, x, y, 'max_depth', max_depth_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
Example #21
Source File: ABuMLGrid.py    From abu with GNU General Public License v3.0 5 votes vote down vote up
def grid_search_init_n_estimators(estimator, x, y, n_estimators_range=None, cv=10, n_jobs=-1,
                                  scoring=None, show=True):
    """
    封装grid search特定的'n_estimators'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数,

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.random_forest_classifier_best()

    eg:
        from abupy import AbuML, ml
        ttn_abu = AbuML.create_test_more_fiter()
        ttn_abu.estimator.random_forest_classifier()
        ml.grid_search_init_n_estimators(ttn_abu.estimator.clf, ttn_abu.x, ttn_abu.y)

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_estimators_range: 默认None, None则会使用 n_estimators_range = np.arange(50, 500, 10)
    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_estimators': 310})
    """

    if n_estimators_range is None:
        n_estimators_range = np.arange(50, 500, 10)

    return grid_search_init_kwargs(estimator, x, y, 'n_estimators', n_estimators_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
Example #22
Source File: ABuMLGrid.py    From abu with GNU General Public License v3.0 5 votes vote down vote up
def _scoring_grid(estimator, scoring):
    """
    只针对有监督学习过滤无监督学习,对scoring未赋予的情况根据
    学习器分类器使用accuracy进行度量,回归器使用可释方差值explained_variance_score,
    使用make_scorer对函数进行score封装

    :param estimator: 学习器对象
    :param scoring: 度量使用的方法,未赋予的情况根据
                    学习器分类器使用accuracy进行度量,回归器使用explained_variance_score进行度量
    :return: scoring
    """

    if not isinstance(estimator, (ClassifierMixin, RegressorMixin)):
        logging.info('only support supervised learning')
        # TODO 无监督学习的scoring度量以及GridSearchCV
        return None

    if scoring is None:
        if isinstance(estimator, ClassifierMixin):
            # 分类器使用accuracy
            return 'accuracy'
        elif isinstance(estimator, RegressorMixin):
            # 回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
            """
                make_scorer中通过greater_is_better对返回值进行正负分配
                eg: sign = 1 if greater_is_better else -1
            """
            return make_scorer(explained_variance_score, greater_is_better=True)
        return None
    return scoring 
Example #23
Source File: test_metrics.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_explained_variance_score(self):
        result = self.df.metrics.explained_variance_score()
        expected = metrics.explained_variance_score(self.target, self.pred)
        self.assertEqual(result, expected) 
Example #24
Source File: models.py    From gordo with GNU Affero General Public License v3.0 5 votes vote down vote up
def score(
        self,
        X: Union[np.ndarray, pd.DataFrame],
        y: Union[np.ndarray, pd.DataFrame],
        sample_weight: Optional[np.ndarray] = None,
    ) -> float:
        """
        Returns the explained variance score between auto encoder's input vs output

        Parameters
        ----------
        X: Union[np.ndarray, pd.DataFrame]
            Input data to the model
        y: Union[np.ndarray, pd.DataFrame]
            Target
        sample_weight: Optional[np.ndarray]
            sample weights

        Returns
        -------
        score: float
            Returns the explained variance score
        """
        if not hasattr(self, "model"):
            raise NotFittedError(
                f"This {self.__class__.__name__} has not been fitted yet."
            )

        out = self.model.predict(X)

        return explained_variance_score(y, out) 
Example #25
Source File: test_sad.py    From basenji with Apache License 2.0 5 votes vote down vote up
def test_multi(self):
    if os.path.isdir('sad/testm'):
      shutil.rmtree('sad/testm')

    sad_opts = '--rc --shifts "0,21"'
    sad_opts += ' -o sad/testm -q "" -p 4'

    cmd = 'basenji_sad_multi.py %s %s %s %s' % \
        (sad_opts, self.params_file, self.model_file, self.vcf_file)
    subprocess.call(cmd, shell=True)

    saved_h5 = h5py.File('sad/saved/sad.h5', 'r')
    this_h5 = h5py.File('sad/testm/sad.h5', 'r')

    saved_keys = sorted(saved_h5.keys())
    this_keys = sorted(this_h5.keys())
    assert(len(saved_keys) == len(this_keys))
    assert(saved_keys == this_keys)

    for key in saved_h5:
      if key[-4:] != '_pct':
        saved_value = saved_h5[key][:]
        this_value = this_h5[key][:]

        if saved_value.dtype.char == 'S':
          np.testing.assert_array_equal(saved_value, this_value)
        else:
          np.testing.assert_allclose(saved_value, this_value, atol=1e-1, rtol=5e-2)
          r2 = explained_variance_score(saved_value.flatten(), this_value.flatten())
          assert(r2 > 0.999)

    saved_h5.close()
    this_h5.close()

    shutil.rmtree('sad/testm') 
Example #26
Source File: models.py    From gordo with GNU Affero General Public License v3.0 5 votes vote down vote up
def score(
        self,
        X: Union[np.ndarray, pd.DataFrame],
        y: Union[np.ndarray, pd.DataFrame],
        sample_weight: Optional[np.ndarray] = None,
    ) -> float:
        """
        Returns the explained variance score between 1 step forecasted input and true
        input at next time step (note: for LSTM X is offset by `lookback_window`).

        Parameters
        ----------
        X: Union[np.ndarray, pd.DataFrame]
            Input data to the model.
        y: Union[np.ndarray, pd.DataFrame]
            Target
        sample_weight: Optional[np.ndarray]
            Sample weights

        Returns
        -------
        score: float
            Returns the explained variance score.
        """
        if not hasattr(self, "model"):
            raise NotFittedError(
                f"This {self.__class__.__name__} has not been fitted yet."
            )

        out = self.predict(X)

        # Limit X samples to match the offset causes by LSTM lookback window
        # ie, if look back window is 5, 'out' will be 5 rows less than X by now
        return explained_variance_score(y[-len(out) :], out) 
Example #27
Source File: RegressorAnalyzer.py    From CDSS with GNU General Public License v3.0 5 votes vote down vote up
def _score_explained_variance(self):
        return explained_variance_score(self._y_test, self._y_predicted) 
Example #28
Source File: test_regression.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(max_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) 
Example #29
Source File: regression_metric.py    From FATE with Apache License 2.0 5 votes vote down vote up
def compute(labels, pred_scores):
        return explained_variance_score(labels, pred_scores) 
Example #30
Source File: run.py    From KitcheNette with Apache License 2.0 4 votes vote down vote up
def save_prediction(model, loader, dataset, args):
    model.eval()
    csv_writer = csv.writer(open(args.checkpoint_dir + 'prediction_' +
                                 args.model_name + '.csv', 'w'))
    csv_writer.writerow(['ingr1', 'ingr1_cate', 'ingr2', 'ingr2_cate', 'prediction', 'target'])

    tar_set = []
    pred_set = []

    ingr2category = pickle.load(open(args.ingr2category_dir, 'rb'))

    for d_idx, (d1, d1_r, d1_c, d1_l, d2, d2_r, d2_c, d2_l, score) in enumerate(loader):
        # Run model for getting predictions
        outputs = model(d1_r.cuda(), d1_c.cuda(), d1_l, d2_r.cuda(), d2_c.cuda(), d2_l)
        predictions = outputs[2].data.cpu().numpy()
        targets = score.data.tolist()

        tar_set += list(targets)
        pred_set += list(predictions)

        for a1, a2, a3, a4 in zip(d1, d2, predictions, targets):
            csv_writer.writerow([a1, ingr2category[a1], a2, ingr2category[a2], a3, a4])

        # Print progress
        if d_idx % args.print_step == 0 or d_idx == len(loader) - 1:
            _progress = '{}/{} saving unknwon predictions..'.format(
                d_idx + 1, len(loader))
            LOGGER.info(_progress)

    mse = mean_squared_error(tar_set, pred_set)
    rmse = sqrt(mse)
    mae = mean_absolute_error(tar_set, pred_set)
    mae2 = median_absolute_error(tar_set, pred_set)
    corr = np.corrcoef(tar_set, pred_set)[0][1]
    ev = explained_variance_score(tar_set, pred_set)
    r2 = r2_score(tar_set, pred_set)

    LOGGER.info('Loss\tMSE\tMAE\tMAE2\tCorr\tEV\t\tR2')
    LOGGER.info('{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(
        rmse, mse, mae, mae2, corr, ev, r2))


# Outputs pred scores for new pair dataset