Python sklearn.datasets.load_iris() Examples

The following are 30 code examples for showing how to use sklearn.datasets.load_iris(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .

Example 1
Project: pandas-feature-union   Author: marrrcin   File: 1_problem.py    License: MIT License 7 votes vote down vote up
def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])

    pipeline = FeatureUnion([
        ("1", make_pipeline(
            FunctionTransformer(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            FunctionTransformer(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean()) 
Example 2
Project: pandas-feature-union   Author: marrrcin   File: 2_transform_solution.py    License: MIT License 7 votes vote down vote up
def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
    data.loc[:, "class"] = raw_data["target"]

    pipeline = FeatureUnion([
        ("1", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean()) 
Example 3
Project: fylearn   Author: sorend   File: test_nfpc.py    License: MIT License 7 votes vote down vote up
def test_build_meowa_factory():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = nfpc.FuzzyPatternClassifier(membership_factory=t_factory,
                                    aggregation_factory=nfpc.MEOWAFactory())

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)
    mean = np.mean(scores)

    assert 0.80 < mean 
Example 4
Project: differential-privacy-library   Author: IBM   File: test_LogisticRegression.py    License: MIT License 6 votes vote down vote up
def test_different_results(self):
        from sklearn import datasets
        from sklearn import linear_model
        from sklearn.model_selection import train_test_split

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12)
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = LogisticRegression(data_norm=12)
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict3 = clf.predict(X_test)

        self.assertFalse(np.all(predict1 == predict2))
        self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2)) 
Example 5
Project: differential-privacy-library   Author: IBM   File: test_LogisticRegression.py    License: MIT License 6 votes vote down vote up
def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12, epsilon=float("inf"))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.all(predict1 == predict2)) 
Example 6
Project: differential-privacy-library   Author: IBM   File: test_LinearRegression.py    License: MIT License 6 votes vote down vote up
def test_different_results(self):
        from sklearn import datasets
        from sklearn import linear_model
        from sklearn.model_selection import train_test_split

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LinearRegression(data_norm=12, bounds_X=([4.3, 2.0, 1.1, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = LinearRegression(data_norm=12, bounds_X=([4.3, 2.0, 1.1, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        clf = linear_model.LinearRegression()
        clf.fit(X_train, y_train)

        predict3 = clf.predict(X_test)

        self.assertFalse(np.all(predict1 == predict2))
        self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2)) 
Example 7
Project: differential-privacy-library   Author: IBM   File: test_LinearRegression.py    License: MIT License 6 votes vote down vote up
def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LinearRegression(data_norm=12, epsilon=float("inf"),
                               bounds_X=([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LinearRegression(normalize=False)
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.allclose(predict1, predict2)) 
Example 8
Project: differential-privacy-library   Author: IBM   File: test_GaussianNB.py    License: MIT License 6 votes vote down vote up
def test_different_results(self):
        from sklearn.naive_bayes import GaussianNB as sk_nb
        from sklearn import datasets

        global_seed(12345)
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
        clf_non_private = sk_nb()

        for clf in [clf_dp, clf_non_private]:
            clf.fit(x_train, y_train)

        same_prediction = clf_dp.predict(x_test) == clf_non_private.predict(x_test)

        self.assertFalse(np.all(same_prediction)) 
Example 9
Project: differential-privacy-library   Author: IBM   File: test_GaussianNB.py    License: MIT License 6 votes vote down vote up
def test_with_iris(self):
        global_seed(12345)
        from sklearn import datasets
        dataset = datasets.load_iris()

        x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)

        bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])

        clf = GaussianNB(epsilon=5.0, bounds=bounds)
        clf.fit(x_train, y_train)

        accuracy = clf.score(x_test, y_test)
        counts = clf.class_count_.copy()
        self.assertGreater(accuracy, 0.5)

        clf.partial_fit(x_train, y_train)
        new_counts = clf.class_count_
        self.assertEqual(np.sum(new_counts), np.sum(counts) * 2) 
Example 10
Project: keras2pmml   Author: vaclavcadek   File: sequential.py    License: MIT License 6 votes vote down vote up
def setUp(self):
        iris = load_iris()

        theano.config.floatX = 'float32'
        X = iris.data.astype(theano.config.floatX)
        y = iris.target.astype(np.int32)
        y_ohe = np_utils.to_categorical(y)

        model = Sequential()
        model.add(Dense(input_dim=X.shape[1], output_dim=5, activation='tanh'))
        model.add(Dense(input_dim=5, output_dim=y_ohe.shape[1], activation='sigmoid'))
        model.compile(loss='categorical_crossentropy', optimizer='sgd')
        model.fit(X, y_ohe, nb_epoch=10, batch_size=1, verbose=3, validation_data=None)

        params = {'copyright': 'Václav Čadek', 'model_name': 'Iris Model'}
        self.model = model
        self.pmml = keras2pmml(self.model, **params)
        self.num_inputs = self.model.input_shape[1]
        self.num_outputs = self.model.output_shape[1]
        self.num_connection_layers = len(self.model.layers)
        self.features = ['x{}'.format(i) for i in range(self.num_inputs)]
        self.class_values = ['y{}'.format(i) for i in range(self.num_outputs)] 
Example 11
Project: skutil   Author: tgsmith61591   File: test_impute.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_bagged_imputer_classification():
    iris = load_iris()

    # make DF, add species col
    X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names)
    X['species'] = iris.target

    # shuffle...
    X = shuffle_dataframe(X)

    # set random indices to be null.. 15% should be good
    rands = np.random.rand(X.shape[0])
    mask = rands > 0.85
    X['species'].iloc[mask] = np.nan

    # define imputer, assert no missing
    imputer = BaggedCategoricalImputer(cols=['species'])
    y = imputer.fit_transform(X)
    assert y['species'].isnull().sum() == 0, 'expected no null...'

    # now test with a different estimator
    imputer = BaggedCategoricalImputer(cols=['species'], base_estimator=RandomForestClassifier())
    y = imputer.fit_transform(X)
    assert y['species'].isnull().sum() == 0, 'expected no null...' 
Example 12
Project: few   Author: lacava   File: test_few.py    License: GNU General Public License v3.0 6 votes vote down vote up
def test_few_classification():
    """test_few.py: tests default classification settings"""
    np.random.seed(42)
    X, y = load_iris(return_X_y=True)
    train,test = train_test_split(np.arange(X.shape[0]), train_size=0.75,
                                  test_size=0.25)
    few = FEW(classification=True,population_size='1x',generations=10)
    few.fit(X[train],y[train])

    print('train score:', few.score(X[train],y[train]))
    print('test score:', few.score(X[test],y[test]))

    # test boolean output
    few = FEW(classification=True,otype='b',population_size='2x',
              seed_with_ml=False,generations=10)
    np.random.seed(42)
    few.fit(X[train],y[train])

    print('train score:', few.score(X[train],y[train]))
    print('test score:', few.score(X[test],y[test]))
    few.print_model() 
Example 13
Project: pandas-feature-union   Author: marrrcin   File: 3_feature_union_solution.py    License: MIT License 6 votes vote down vote up
def main():
    raw_data = load_iris()
    data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
    data.loc[:, "class"] = raw_data["target"]

    pipeline = PandasFeatureUnion([
        ("1", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
            # other transformations
        )),
        ("2", make_pipeline(
            PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
            # other transformations
        ))
    ])

    X = pipeline.fit_transform(data)
    print(X["sepal length (cm)"].mean())
    print(X["sepal width (cm)"].mean()) 
Example 14
Project: fylearn   Author: sorend   File: test_nfpc.py    License: MIT License 6 votes vote down vote up
def test_build_ps_owa_factory():

    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = nfpc.FuzzyPatternClassifier(
        membership_factory=t_factory,
        aggregation_factory=nfpc.GAOWAFactory(optimizer=nfpc.ps_owa_optimizer())
    )

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)
    mean = np.mean(scores)

    print("mean", mean)

    assert 0.92 < mean 
Example 15
Project: fylearn   Author: sorend   File: test_fpcga.py    License: MIT License 6 votes vote down vote up
def test_classifier_iris():

    iris = load_iris()

    X = iris.data
    y = iris.target

    from sklearn.preprocessing import MinMaxScaler
    X = MinMaxScaler().fit_transform(X)

    l = fpcga.FuzzyPatternClassifierGA(iterations=100, random_state=1)

    from sklearn.model_selection import cross_val_score

    scores = cross_val_score(l, X, y, cv=10)

    assert len(scores) == 10
    assert np.mean(scores) > 0.6
    mean = np.mean(scores)

    print("mean", mean)

    assert 0.92 == pytest.approx(mean, 0.01) 
Example 16
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_graphical_lasso.py    License: MIT License 6 votes vote down vote up
def test_graphical_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # (need to set penalize.diagonal to FALSE)
    cov_R = np.array([
        [0.68112222, 0.0000000, 0.265820, 0.02464314],
        [0.00000000, 0.1887129, 0.000000, 0.00000000],
        [0.26582000, 0.0000000, 3.095503, 0.28697200],
        [0.02464314, 0.0000000, 0.286972, 0.57713289]
        ])
    icov_R = np.array([
        [1.5190747, 0.000000, -0.1304475, 0.0000000],
        [0.0000000, 5.299055, 0.0000000, 0.0000000],
        [-0.1304475, 0.000000, 0.3498624, -0.1683946],
        [0.0000000, 0.000000, -0.1683946, 1.8164353]
        ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R) 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_graphical_lasso.py    License: MIT License 6 votes vote down vote up
def test_graphical_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5]
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graphical_lasso(emp_cov, alpha=0.01, return_costs=False,
                                    mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5) 
Example 18
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_graph_lasso.py    License: MIT License 6 votes vote down vote up
def test_graph_lasso_iris():
    # Hard-coded solution from R glasso package for alpha=1.0
    # (need to set penalize.diagonal to FALSE)
    cov_R = np.array([
        [0.68112222, 0.0000000, 0.265820, 0.02464314],
        [0.00000000, 0.1887129, 0.000000, 0.00000000],
        [0.26582000, 0.0000000, 3.095503, 0.28697200],
        [0.02464314, 0.0000000, 0.286972, 0.57713289]
        ])
    icov_R = np.array([
        [1.5190747, 0.000000, -0.1304475, 0.0000000],
        [0.0000000, 5.299055, 0.0000000, 0.0000000],
        [-0.1304475, 0.000000, 0.3498624, -0.1683946],
        [0.0000000, 0.000000, -0.1683946, 1.8164353]
        ])
    X = datasets.load_iris().data
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False,
                                mode=method)
        assert_array_almost_equal(cov, cov_R)
        assert_array_almost_equal(icov, icov_R) 
Example 19
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_graph_lasso.py    License: MIT License 6 votes vote down vote up
def test_graph_lasso_iris_singular():
    # Small subset of rows to test the rank-deficient case
    # Need to choose samples such that none of the variances are zero
    indices = np.arange(10, 13)

    # Hard-coded solution from R glasso package for alpha=0.01
    cov_R = np.array([
        [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
        [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
        [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
        [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
    ])
    icov_R = np.array([
        [24.42244057, -16.831679593, 0.0, 0.0],
        [-16.83168201, 24.351841681, -6.206896552, -12.5],
        [0.0, -6.206896171, 153.103448276, 0.0],
        [0.0, -12.499999143, 0.0, 462.5]
    ])
    X = datasets.load_iris().data[indices, :]
    emp_cov = empirical_covariance(X)
    for method in ('cd', 'lars'):
        cov, icov = graph_lasso(emp_cov, alpha=0.01, return_costs=False,
                                mode=method)
        assert_array_almost_equal(cov, cov_R, decimal=5)
        assert_array_almost_equal(icov, icov_R, decimal=5) 
Example 20
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_classification.py    License: MIT License 6 votes vote down vote up
def test_classification_report_multiclass_with_label_detection():
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with label detection
    expected_report = """\
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        24
           1       0.33      0.10      0.15        31
           2       0.42      0.90      0.57        20

    accuracy                           0.53        75
   macro avg       0.53      0.60      0.51        75
weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report) 
Example 21
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_classification.py    License: MIT License 6 votes vote down vote up
def test_classification_report_multiclass_with_digits():
    # Test performance report with added digits in floating point values
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
              precision    recall  f1-score   support

      setosa    0.82609   0.79167   0.80851        24
  versicolor    0.33333   0.09677   0.15000        31
   virginica    0.41860   0.90000   0.57143        20

    accuracy                        0.53333        75
   macro avg    0.52601   0.59615   0.50998        75
weighted avg    0.51375   0.53333   0.47310        75
"""
    report = classification_report(
        y_true, y_pred, labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names, digits=5)
    assert_equal(report, expected_report) 
Example 22
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_unsupervised.py    License: MIT License 6 votes vote down vote up
def test_correct_labelsize():
    # Assert 1 < n_labels < n_samples
    dataset = datasets.load_iris()
    X = dataset.data

    # n_labels = n_samples
    y = np.arange(X.shape[0])
    assert_raises_regexp(ValueError,
                         r'Number of labels is %d\. Valid values are 2 '
                         r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
                         silhouette_score, X, y)

    # n_labels = 1
    y = np.zeros(X.shape[0])
    assert_raises_regexp(ValueError,
                         r'Number of labels is %d\. Valid values are 2 '
                         r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
                         silhouette_score, X, y) 
Example 23
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_multiclass.py    License: MIT License 6 votes vote down vote up
def test_safe_split_with_precomputed_kernel():
    clf = SVC()
    clfp = SVC(kernel="precomputed")

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    K = np.dot(X, X.T)

    cv = ShuffleSplit(test_size=0.25, random_state=0)
    train, test = list(cv.split(X))[0]

    X_train, y_train = _safe_split(clf, X, y, train)
    K_train, y_train2 = _safe_split(clfp, K, y, train)
    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
    assert_array_almost_equal(y_train, y_train2)

    X_test, y_test = _safe_split(clf, X, y, test, train)
    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
    assert_array_almost_equal(y_test, y_test2) 
Example 24
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_rfe.py    License: MIT License 6 votes vote down vote up
def test_rfe_features_importance():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target

    clf = RandomForestClassifier(n_estimators=20,
                                 random_state=generator, max_depth=2)
    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
    rfe.fit(X, y)
    assert_equal(len(rfe.ranking_), X.shape[1])

    clf_svc = SVC(kernel="linear")
    rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
    rfe_svc.fit(X, y)

    # Check if the supports are equal
    assert_array_equal(rfe.get_support(), rfe_svc.get_support()) 
Example 25
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_rfe.py    License: MIT License 6 votes vote down vote up
def test_rfecv_verbose_output():
    # Check verbose=1 is producing an output.
    from io import StringIO
    import sys
    sys.stdout = StringIO()

    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = list(iris.target)

    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, verbose=1)
    rfecv.fit(X, y)

    verbose_output = sys.stdout
    verbose_output.seek(0)
    assert_greater(len(verbose_output.readline()), 0) 
Example 26
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_rfe.py    License: MIT License 6 votes vote down vote up
def test_rfecv_grid_scores_size():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = list(iris.target)   # regression test: list should be supported

    # Non-regression test for varying combinations of step and
    # min_features_to_select.
    for step, min_features_to_select in [[2, 1], [2, 2], [3, 3]]:
        rfecv = RFECV(estimator=MockClassifier(), step=step,
                      min_features_to_select=min_features_to_select, cv=5)
        rfecv.fit(X, y)

        score_len = np.ceil(
            (X.shape[1] - min_features_to_select) / step) + 1
        assert len(rfecv.grid_scores_) == score_len
        assert len(rfecv.ranking_) == X.shape[1]
        assert rfecv.n_features_ >= min_features_to_select 
Example 27
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_mask():
    # test that cross_val_score works with boolean masks
    svm = SVC(kernel="linear")
    iris = load_iris()
    X, y = iris.data, iris.target
    kfold = KFold(5)
    scores_indices = cross_val_score(svm, X, y, cv=kfold)
    kfold = KFold(5)
    cv_masks = []
    for train, test in kfold.split(X, y):
        mask_train = np.zeros(len(y), dtype=np.bool)
        mask_test = np.zeros(len(y), dtype=np.bool)
        mask_train[train] = 1
        mask_test[test] = 1
        cv_masks.append((train, test))
    scores_masks = cross_val_score(svm, X, y, cv=cv_masks)
    assert_array_equal(scores_indices, scores_masks) 
Example 28
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cross_val_score(svm, X, y)
    assert_array_almost_equal(score_precomputed, score_linear)

    # test with callable
    svm = SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T))
    score_callable = cross_val_score(svm, X, y)
    assert_array_almost_equal(score_precomputed, score_callable)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cross_val_score, svm,
                  linear_kernel.tolist(), y) 
Example 29
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_validation.py    License: MIT License 6 votes vote down vote up
def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2) 
Example 30
Project: differential-privacy-library   Author: IBM   File: test_clip_to_norm.py    License: MIT License 5 votes vote down vote up
def test_iris(self):
        from sklearn import datasets
        dataset = datasets.load_iris()

        X_train, y_train = dataset.data, dataset.target

        norms = np.linalg.norm(X_train, axis=1)
        clip = (norms[0] + norms[1]) / 2

        X_clipped = clip_to_norm(X_train, clip)
        clipped_norms = np.linalg.norm(X_clipped, axis=1)
        self.assertLessEqual(clipped_norms[0], norms[0])
        self.assertLessEqual(clipped_norms[1], norms[1])
        self.assertTrue(np.isclose(clipped_norms[0], clip) or np.isclose(clipped_norms[1], clip))