Python sklearn.metrics.explained_variance_score() Examples

The following are 30 code examples for showing how to use sklearn.metrics.explained_variance_score(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.metrics , or try the search function .

Example 1
Project: gordo   Author: equinor   File: test_builder.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def test_metrics_from_list():
    """
    Check getting functions from a list of metric names
    """
    default = ModelBuilder.metrics_from_list()
    assert default == [
        metrics.explained_variance_score,
        metrics.r2_score,
        metrics.mean_squared_error,
        metrics.mean_absolute_error,
    ]

    specifics = ModelBuilder.metrics_from_list(
        ["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
    )
    assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score] 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_regression.py    License: MIT License 6 votes vote down vote up
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [1., -2., 3.], [1., 2., 3.]) 
Example 3
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_regression.py    License: MIT License 6 votes vote down vote up
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2) 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cross_val_score(reg, X, y, cv=5,
                                     scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
Example 5
Project: TrafficFlowPrediction   Author: zbj6633   File: test.py    License: MIT License 6 votes vote down vote up
def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2) 
Example 6
Project: KerasNeuralFingerprint   Author: GUR9000   File: train_fingerprint_model.py    License: MIT License 6 votes vote down vote up
def eval_metrics_on(predictions, labels):
    '''
    assuming this is a regression task; labels are continuous-valued floats
    
    returns most regression-related scores for the given predictions/targets as a dictionary:
    
        r2, mean_abs_error, mse, rmse, median_absolute_error, explained_variance_score
    '''
    if len(labels[0])==2: #labels is list of data/labels pairs
        labels = np.concatenate([l[1] for l in labels])
    predictions = predictions[:,0]
    
    r2                       = metrics.r2_score(labels, predictions)
    mean_abs_error           = np.abs(predictions - labels).mean()
    mse                      = ((predictions - labels)**2).mean()
    rmse                     = np.sqrt(mse)
    median_absolute_error    = metrics.median_absolute_error(labels, predictions) # robust to outliers
    explained_variance_score = metrics.explained_variance_score(labels, predictions) # best score = 1, lower is worse
    return {'r2':r2, 'mean_abs_error':mean_abs_error, 'mse':mse, 'rmse':rmse, 
            'median_absolute_error':median_absolute_error, 
            'explained_variance_score':explained_variance_score} 
Example 7
Project: JusticeAI   Author: Cyberjusticelab   File: abstract_regressor.py    License: MIT License 6 votes vote down vote up
def test(self):
        """
        Tests the regressor using the dataset and writes:
            1- coefficient r2
            2- explained variance
            3- mean absolute error
            4- mean squared error

        :return: None
        """
        X = np.array([precedent['facts_vector'][self.important_facts_index] for precedent in self.dataset])
        y_pred = self.model.predict(X)
        y_true = np.array([precedent['outcomes_vector'][self.outcome_index]
                           for precedent in self.dataset])
        r2 = metrics.r2_score(y_true, y_pred)
        variance = metrics.explained_variance_score(y_true, y_pred)
        mean_abs_error = metrics.mean_absolute_error(y_true, y_pred)
        mean_squared_error = metrics.mean_squared_error(y_true, y_pred)
        Log.write('R2: {0:.2f}'.format(r2))
        Log.write('Explained Variance: {0:.2f}'.format(variance))
        Log.write('Mean Absolute Error: {0:.2f}'.format(mean_abs_error))
        Log.write('Mean Squared Error: {0:.2f}'.format(mean_squared_error)) 
Example 8
Project: python-dlpy   Author: sassoftware   File: test_metrics.py    License: Apache License 2.0 6 votes vote down vote up
def test_explained_variance_score(self):

        try:
            from sklearn.metrics import explained_variance_score as skevs
        except:
            unittest.TestCase.skipTest(self, "sklearn is not found in the libraries")

        skevs_score1 = skevs(self.local_reg1.target, self.local_reg1.p_target)
        dlpyevs_score1 = explained_variance_score('target', 'p_target', castable=self.reg_table1)

        self.assertAlmostEqual(skevs_score1, dlpyevs_score1)

        skevs_score2 = skevs(self.local_reg1.target, self.local_reg2.p_target)
        dlpyevs_score2 = explained_variance_score(self.reg_table1.target, self.reg_table2.p_target,
                                                  id_vars='id1')

        self.assertAlmostEqual(skevs_score2, dlpyevs_score2) 
Example 9
Project: abu   Author: bbfamily   File: ABuMLGrid.py    License: GNU General Public License v3.0 6 votes vote down vote up
def grid_search_init_n_components(estimator, x, y, n_components_range=None, cv=10, n_jobs=-1,
                                  scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数,
    具体阅读AbuMLCreater._estimators_prarms_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_components_range: 默认None, None则会使用:
            n_estimators_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,回归器使用
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_components': 10})
    """
    if n_components_range is None:
        n_components_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    return grid_search_init_kwargs(estimator, x, y, 'n_components', n_components_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
Example 10
Project: TrafficFlowPrediction   Author: xiaochus   File: main.py    License: MIT License 6 votes vote down vote up
def eva_regress(y_true, y_pred):
    """Evaluation
    evaluate the predicted resul.

    # Arguments
        y_true: List/ndarray, ture data.
        y_pred: List/ndarray, predicted data.
    """

    mape = MAPE(y_true, y_pred)
    vs = metrics.explained_variance_score(y_true, y_pred)
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    r2 = metrics.r2_score(y_true, y_pred)
    print('explained_variance_score:%f' % vs)
    print('mape:%f%%' % mape)
    print('mae:%f' % mae)
    print('mse:%f' % mse)
    print('rmse:%f' % math.sqrt(mse))
    print('r2:%f' % r2) 
Example 11
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_regression.py    License: MIT License 6 votes vote down vote up
def test_regression_custom_weights():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
    maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
    rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
    evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])

    assert_almost_equal(msew, 0.39, decimal=2)
    assert_almost_equal(maew, 0.475, decimal=3)
    assert_almost_equal(rw, 0.94, decimal=2)
    assert_almost_equal(evsw, 0.94, decimal=2)

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput=[0.3, 0.7])
    assert_almost_equal(msle, msle2, decimal=2) 
Example 12
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    neg_mse_scores = cross_val_score(reg, X, y, cv=5,
                                     scoring="neg_mean_squared_error")
    expected_neg_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(neg_mse_scores, expected_neg_mse, 2)

    # Explained variance
    scoring = make_scorer(explained_variance_score)
    ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2) 
Example 13
Project: basenji   Author: calico   File: test_sad.py    License: Apache License 2.0 5 votes vote down vote up
def test_multi(self):
    if os.path.isdir('sad/testm'):
      shutil.rmtree('sad/testm')

    sad_opts = '--rc --shifts "0,21"'
    sad_opts += ' -o sad/testm -q "" -p 4'

    cmd = 'basenji_sad_multi.py %s %s %s %s' % \
        (sad_opts, self.params_file, self.model_file, self.vcf_file)
    subprocess.call(cmd, shell=True)

    saved_h5 = h5py.File('sad/saved/sad.h5', 'r')
    this_h5 = h5py.File('sad/testm/sad.h5', 'r')

    saved_keys = sorted(saved_h5.keys())
    this_keys = sorted(this_h5.keys())
    assert(len(saved_keys) == len(this_keys))
    assert(saved_keys == this_keys)

    for key in saved_h5:
      if key[-4:] != '_pct':
        saved_value = saved_h5[key][:]
        this_value = this_h5[key][:]

        if saved_value.dtype.char == 'S':
          np.testing.assert_array_equal(saved_value, this_value)
        else:
          np.testing.assert_allclose(saved_value, this_value, atol=1e-1, rtol=5e-2)
          r2 = explained_variance_score(saved_value.flatten(), this_value.flatten())
          assert(r2 > 0.999)

    saved_h5.close()
    this_h5.close()

    shutil.rmtree('sad/testm') 
Example 14
Project: basenji   Author: calico   File: test_sad.py    License: Apache License 2.0 5 votes vote down vote up
def test_multi(self):
    if os.path.isdir('sad/testrm'):
      shutil.rmtree('sad/testrm')

    sad_opts = '--rc --shifts "0,21"'
    sad_opts += ' -o sad/testrm -q "" -p 4'
    cmd = 'basenji_sad_ref_multi.py %s %s %s %s' % \
        (sad_opts, self.params_file, self.model_file, self.vcf_file)
    subprocess.call(cmd, shell=True)

    saved_h5 = h5py.File('sad/saved/sadr.h5', 'r')
    this_h5 = h5py.File('sad/testrm/sad.h5', 'r')

    saved_keys = sorted(saved_h5.keys())
    this_keys = sorted(this_h5.keys())
    assert(len(saved_keys) == len(this_keys))
    assert(saved_keys == this_keys)

    for key in saved_h5:
      if key[-4:] != '_pct':
        saved_value = saved_h5[key][:]
        this_value = this_h5[key][:]

        if saved_value.dtype.char == 'S':
          assert((saved_value == this_value).all())
          np.testing.assert_array_equal(saved_value, this_value)
        else:
          np.testing.assert_allclose(saved_value, this_value, atol=2e-1, rtol=2e-1)
          r2 = explained_variance_score(saved_value.flatten(), this_value.flatten())
          assert(r2 > 0.999)

    saved_h5.close()
    this_h5.close()

    shutil.rmtree('sad/testrm') 
Example 15
Project: pymssa   Author: kieferk   File: mssa.py    License: MIT License 5 votes vote down vote up
def _calculate_optimal_reconstruction_orders(self,
                                                 timeseries,
                                                 components):
        '''Calculates the optimal component ordering for reconstructing
        each of the timeseries. This is done by simply ranking the components
        in terms of how much variance they explain for each timeseries in the
        original data.
        '''

        optimal_orders = optimal_component_ordering(
            timeseries,
            components
        )

        optimal_orders = optimal_orders.astype(int)

        order_explained_variance = np.zeros_like(optimal_orders).astype(float)
        for ts_idx in range(timeseries.shape[1]):
            ts_comp = components[ts_idx, :, :]
            ts_comp = ts_comp[:, optimal_orders[:, ts_idx]]
            # ts_comp = np.cumsum(ts_comp, axis=1)

            order_explained_variance[:, ts_idx] = np.apply_along_axis(
                partial(explained_variance_score, timeseries[:, ts_idx]),
                0,
                ts_comp
            )

        return optimal_orders, order_explained_variance 
Example 16
Project: gordo   Author: equinor   File: test_builder.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.linear_model.LinearRegression"
        }
    }
    data_config = get_random_data()

    evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
    if metrics_:
        evaluation_config.update({"metrics": metrics_})

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine = ModelBuilder(machine).build()

    expected_metrics = metrics_ or [
        "sklearn.metrics.explained_variance_score",
        "sklearn.metrics.r2_score",
        "sklearn.metrics.mean_squared_error",
        "sklearn.metrics.mean_absolute_error",
    ]

    assert all(
        metric.split(".")[-1].replace("_", "-")
        in machine.metadata.build_metadata.model.cross_validation.scores
        for metric in expected_metrics
    ) 
Example 17
Project: gordo   Author: equinor   File: models.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def score(
        self,
        X: Union[np.ndarray, pd.DataFrame],
        y: Union[np.ndarray, pd.DataFrame],
        sample_weight: Optional[np.ndarray] = None,
    ) -> float:
        """
        Returns the explained variance score between auto encoder's input vs output

        Parameters
        ----------
        X: Union[np.ndarray, pd.DataFrame]
            Input data to the model
        y: Union[np.ndarray, pd.DataFrame]
            Target
        sample_weight: Optional[np.ndarray]
            sample weights

        Returns
        -------
        score: float
            Returns the explained variance score
        """
        if not hasattr(self, "model"):
            raise NotFittedError(
                f"This {self.__class__.__name__} has not been fitted yet."
            )

        out = self.model.predict(X)

        return explained_variance_score(y, out) 
Example 18
Project: gordo   Author: equinor   File: models.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def score(
        self,
        X: Union[np.ndarray, pd.DataFrame],
        y: Union[np.ndarray, pd.DataFrame],
        sample_weight: Optional[np.ndarray] = None,
    ) -> float:
        """
        Returns the explained variance score between 1 step forecasted input and true
        input at next time step (note: for LSTM X is offset by `lookback_window`).

        Parameters
        ----------
        X: Union[np.ndarray, pd.DataFrame]
            Input data to the model.
        y: Union[np.ndarray, pd.DataFrame]
            Target
        sample_weight: Optional[np.ndarray]
            Sample weights

        Returns
        -------
        score: float
            Returns the explained variance score.
        """
        if not hasattr(self, "model"):
            raise NotFittedError(
                f"This {self.__class__.__name__} has not been fitted yet."
            )

        out = self.predict(X)

        # Limit X samples to match the offset causes by LSTM lookback window
        # ie, if look back window is 5, 'out' will be 5 rows less than X by now
        return explained_variance_score(y[-len(out) :], out) 
Example 19
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_regression.py    License: MIT License 5 votes vote down vote up
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(max_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) 
Example 20
Project: FATE   Author: FederatedAI   File: regression_metric.py    License: Apache License 2.0 5 votes vote down vote up
def compute(labels, pred_scores):
        return explained_variance_score(labels, pred_scores) 
Example 21
Project: CDSS   Author: HealthRex   File: RegressorAnalyzer.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _score_explained_variance(self):
        return explained_variance_score(self._y_test, self._y_predicted) 
Example 22
Project: pandas-ml   Author: pandas-ml   File: test_metrics.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_explained_variance_score(self):
        result = self.df.metrics.explained_variance_score()
        expected = metrics.explained_variance_score(self.target, self.pred)
        self.assertEqual(result, expected) 
Example 23
Project: abu   Author: bbfamily   File: ABuMLGrid.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _scoring_grid(estimator, scoring):
    """
    只针对有监督学习过滤无监督学习,对scoring未赋予的情况根据
    学习器分类器使用accuracy进行度量,回归器使用可释方差值explained_variance_score,
    使用make_scorer对函数进行score封装

    :param estimator: 学习器对象
    :param scoring: 度量使用的方法,未赋予的情况根据
                    学习器分类器使用accuracy进行度量,回归器使用explained_variance_score进行度量
    :return: scoring
    """

    if not isinstance(estimator, (ClassifierMixin, RegressorMixin)):
        logging.info('only support supervised learning')
        # TODO 无监督学习的scoring度量以及GridSearchCV
        return None

    if scoring is None:
        if isinstance(estimator, ClassifierMixin):
            # 分类器使用accuracy
            return 'accuracy'
        elif isinstance(estimator, RegressorMixin):
            # 回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
            """
                make_scorer中通过greater_is_better对返回值进行正负分配
                eg: sign = 1 if greater_is_better else -1
            """
            return make_scorer(explained_variance_score, greater_is_better=True)
        return None
    return scoring 
Example 24
Project: abu   Author: bbfamily   File: ABuMLGrid.py    License: GNU General Public License v3.0 5 votes vote down vote up
def grid_search_init_n_estimators(estimator, x, y, n_estimators_range=None, cv=10, n_jobs=-1,
                                  scoring=None, show=True):
    """
    封装grid search特定的'n_estimators'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数,

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.random_forest_classifier_best()

    eg:
        from abupy import AbuML, ml
        ttn_abu = AbuML.create_test_more_fiter()
        ttn_abu.estimator.random_forest_classifier()
        ml.grid_search_init_n_estimators(ttn_abu.estimator.clf, ttn_abu.x, ttn_abu.y)

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_estimators_range: 默认None, None则会使用 n_estimators_range = np.arange(50, 500, 10)
    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_estimators': 310})
    """

    if n_estimators_range is None:
        n_estimators_range = np.arange(50, 500, 10)

    return grid_search_init_kwargs(estimator, x, y, 'n_estimators', n_estimators_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
Example 25
Project: abu   Author: bbfamily   File: ABuMLGrid.py    License: GNU General Public License v3.0 5 votes vote down vote up
def grid_search_init_max_depth(estimator, x, y, max_depth_range=None, cv=10, n_jobs=-1,
                               scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.decision_tree_classifier_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param max_depth_range: 默认None, None则会使用:
            max_depth_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'max_depth': 3})
    """

    if max_depth_range is None:
        max_depth_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    return grid_search_init_kwargs(estimator, x, y, 'max_depth', max_depth_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
Example 26
Project: abu   Author: bbfamily   File: ABuMLGrid.py    License: GNU General Public License v3.0 5 votes vote down vote up
def grid_search_init_n_neighbors(estimator, x, y, n_neighbors_range=None, cv=10, n_jobs=-1,
                                 scoring=None, show=True):
    """
    封装grid search特定的'n_components'关键字参数最优搜索,
    为AbuMLCreater中_estimators_prarms_best提供callback函数

    具体阅读
            AbuMLCreater._estimators_prarms_best()
            + AbuMLCreater.knn_classifier_best()

    :param estimator: 学习器对象
    :param x: 训练集x矩阵,numpy矩阵
    :param y: 训练集y序列,numpy序列
    :param n_neighbors_range: 默认None, None则会使用:
            n_estimators_range = np.arange(2, np.maximum(10, int(x.shape[1]) - 1), 1)

    :param cv: int,GridSearchCV切割训练集测试集参数,默认10
    :param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
    :param scoring: 测试集的度量方法,默认为None, None的情况下分类器使用accuracy进行度量,
                    回归器使用可释方差值explained_variance_score,使用make_scorer对函数进行score封装
    :param show: 是否进行可视化
    :return: eg: (0.82154882154882158, {'n_components': 10})
    """

    if n_neighbors_range is None:
        # 邻居投票者控制在1-np.minimum(26, 总数的1/3)
        n_neighbors_range = np.arange(1, np.minimum(26, int(x.shape[0] / 3)), 1)

    return grid_search_init_kwargs(estimator, x, y, 'n_neighbors', n_neighbors_range,
                                   cv=cv, n_jobs=n_jobs, scoring=scoring, show=show) 
Example 27
def plot_learning_curve(model, err_func=explained_variance_score, N=300, n_runs=10, n_sizes=50, ylim=None):
    sizes = np.linspace(5, N, n_sizes).astype(int)
    train_err = np.zeros((n_runs, n_sizes))
    validation_err = np.zeros((n_runs, n_sizes))
    for i in range(n_runs):
        for j, size in enumerate(sizes):
            xtrain, xtest, ytrain, ytest = train_test_split(
                X, y, train_size=size, random_state=i)
            # Train on only the first `size` points
            model.fit(xtrain, ytrain)
            validation_err[i, j] = err_func(ytest, model.predict(xtest))
            train_err[i, j] = err_func(ytrain, model.predict(xtrain))

    plt.plot(sizes, validation_err.mean(axis=0), lw=2, label='validation')
    plt.plot(sizes, train_err.mean(axis=0), lw=2, label='training')

    plt.xlabel('traning set size')
    plt.ylabel(err_func.__name__.replace('_', ' '))
    
    plt.grid(True)
    
    plt.legend(loc=0)
    
    plt.xlim(0, N-1)
    
    if ylim:
        plt.ylim(ylim) 
Example 28
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_regression.py    License: MIT License 5 votes vote down vote up
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_squared_log_error(y_true, y_pred),
                        mean_squared_error(np.log(1 + y_true),
                                           np.log(1 + y_pred)))
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) 
Example 29
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_regression.py    License: MIT License 5 votes vote down vote up
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
    assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
                        "used when targets contain negative values.",
                        mean_squared_log_error, [-1.], [-1.]) 
Example 30
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_regression.py    License: MIT License 4 votes vote down vote up
def test_regression_multioutput_array():
    y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
    y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]

    mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
    mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    r = r2_score(y_true, y_pred, multioutput='raw_values')
    evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')

    assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2)
    assert_array_almost_equal(mae, [0.25, 0.625], decimal=2)
    assert_array_almost_equal(r, [0.95, 0.93], decimal=2)
    assert_array_almost_equal(evs, [0.95, 0.93], decimal=2)

    # mean_absolute_error and mean_squared_error are equal because
    # it is a binary problem.
    y_true = [[0, 0]]*4
    y_pred = [[1, 1]]*4
    mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
    mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    r = r2_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(mse, [1., 1.], decimal=2)
    assert_array_almost_equal(mae, [1., 1.], decimal=2)
    assert_array_almost_equal(r, [0., 0.], decimal=2)

    r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values')
    assert_array_almost_equal(r, [0, -3.5], decimal=2)
    assert_equal(np.mean(r), r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                 multioutput='uniform_average'))
    evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                                   multioutput='raw_values')
    assert_array_almost_equal(evs, [0, -1.25], decimal=2)

    # Checking for the condition in which both numerator and denominator is
    # zero.
    y_true = [[1, 3], [-1, 2]]
    y_pred = [[1, 4], [-1, 1]]
    r2 = r2_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(r2, [1., -3.], decimal=2)
    assert_equal(np.mean(r2), r2_score(y_true, y_pred,
                 multioutput='uniform_average'))
    evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')
    assert_array_almost_equal(evs, [1., -3.], decimal=2)
    assert_equal(np.mean(evs), explained_variance_score(y_true, y_pred))

    # Handling msle separately as it does not accept negative inputs.
    y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
    y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
    msle = mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
    msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
                               multioutput='raw_values')
    assert_array_almost_equal(msle, msle2, decimal=2)