Python sklearn.datasets.load_breast_cancer() Examples

The following are 30 code examples of sklearn.datasets.load_breast_cancer(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .
Example #1
Source File: test_classifier_comb.py    From combo with BSD 2-Clause "Simplified" License 7 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers, method='average') 
Example #2
Source File: test_Feature_Binarizer_From_Trees.py    From AIX360 with Apache License 2.0 7 votes vote down vote up
def setUp(self) -> None:
        self.random_state = 0
        d: dict = load_breast_cancer()
        X: DataFrame = DataFrame(d['data'], columns=d['feature_names'])
        self.col_ordinal = X.columns.to_list()
        np.random.seed(self.random_state)
        s = np.array(['a', 'b', 'c'])
        X['cat alpha'] = s[np.random.randint(0, 3, len(X))]
        X['cat num'] = np.random.randint(0, 3, len(X))
        self.col_categorical = ['cat alpha', 'cat num']
        s = np.array(['a', 'b'])
        X['bin alpha'] = s[np.random.randint(0, 2, len(X))]
        X['bin num'] = np.random.randint(0, 2, len(X))
        self.col_binary = ['bin alpha', 'bin num']
        self.X = X
        self.y: ndarray = d['target']
        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(self.X, self.y, test_size=0.4, random_state=self.random_state) 
Example #3
Source File: test_classifier_stacking.py    From combo with BSD 2-Clause "Simplified" License 7 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = Stacking(classifiers, n_folds=4)
        self.clf.fit(self.X_train, self.y_train) 
Example #4
Source File: test_sklearn_feature_selection_converters.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_select_fwe_int(self):
        model = SelectFwe()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fwe",
            [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnSelectFwe",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
Example #5
Source File: main_nearest_neighbor.py    From wisconsin-breast-cancer with Apache License 2.0 6 votes vote down vote up
def main():
    dataset = datasets.load_breast_cancer()

    features = dataset.data
    labels = dataset.target

    num_features = features.shape[1]

    features = StandardScaler().fit_transform(features)

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.3, stratify=labels
    )

    model = NearestNeighbor(train_features, train_labels, num_features)

    model.predict(test_features, test_labels, result_path="./results/nearest_neighbor/") 
Example #6
Source File: test_classifier_des.py    From combo with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = DES_LA(classifiers, local_region_size=30)
        self.clf.fit(self.X_train, self.y_train) 
Example #7
Source File: test_gridsearch.py    From dislib with Apache License 2.0 6 votes vote down vote up
def test_fit_2(self):
        """Tests GridSearchCV fit() with different data."""
        x_np, y_np = datasets.load_breast_cancer(return_X_y=True)
        x = ds.array(x_np, block_size=(100, 10))
        x = StandardScaler().fit_transform(x)
        y = ds.array(y_np.reshape(-1, 1), block_size=(100, 1))
        parameters = {'c': [0.1], 'gamma': [0.1]}
        csvm = CascadeSVM()
        searcher = GridSearchCV(csvm, parameters, cv=5)
        searcher.fit(x, y)

        self.assertTrue(hasattr(searcher, 'best_estimator_'))
        self.assertTrue(hasattr(searcher, 'best_score_'))
        self.assertTrue(hasattr(searcher, 'best_params_'))
        self.assertTrue(hasattr(searcher, 'best_index_'))
        self.assertTrue(hasattr(searcher, 'scorer_'))
        self.assertEqual(searcher.n_splits_, 5) 
Example #8
Source File: test_classifier_comb.py    From combo with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers, method='average')
        self.clf.fit(self.X_train, self.y_train) 
Example #9
Source File: test_classifier_comb.py    From combo with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2])

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers, method='average',
                                              weights=clf_weights)

        self.clf.fit(self.X_train, self.y_train) 
Example #10
Source File: test_classifier_comb.py    From combo with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers,
                                              method='maximization')
        self.clf.fit(self.X_train, self.y_train) 
Example #11
Source File: test_pyfms.py    From pyfms with MIT License 6 votes vote down vote up
def test_save_load_classifier(self):
        X, y = datasets.load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        k = 4

        classifier_before = pyfms.Classifier(X.shape[1], k=k)
        classifier_before.fit(X_train, y_train, nb_epoch=1000)

        weights_before = classifier_before.get_weights()
        accuracy_before = accuracy_score(y_test, classifier_before.predict(X_test))

        classifier_file = os.path.join(self.workspace, 'classifier.fm')
        classifier_before.save_weights(classifier_file)

        classifier_after = pyfms.Classifier(X.shape[1])
        classifier_after.load_weights(classifier_file)

        weights_after = classifier_after.get_weights()
        accuracy_after = accuracy_score(y_test, classifier_after.predict(X_test))

        for wb, wa in zip(weights_before, weights_after):
            np.testing.assert_array_equal(wb, wa)
        self.assertEqual(accuracy_before, accuracy_after) 
Example #12
Source File: test_classifier_comb.py    From combo with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers,
                                              method='median')
        self.clf.fit(self.X_train, self.y_train) 
Example #13
Source File: test_cluster_comb.py    From combo with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.X, self.y = load_breast_cancer(return_X_y=True)

        self.n_clusters = 5
        self.n_estimators = 3

        # Initialize a set of estimators
        estimators = [KMeans(n_clusters=self.n_clusters),
                      MiniBatchKMeans(n_clusters=self.n_clusters),
                      AgglomerativeClustering(n_clusters=self.n_clusters)]

        # Clusterer Ensemble without initializing a new Class
        self.original_labels = np.zeros([self.X.shape[0], self.n_estimators])

        for i, estimator in enumerate(estimators):
            estimator.fit(self.X)
            self.original_labels[:, i] = estimator.labels_ 
Example #14
Source File: test_sklearn_feature_selection_converters.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_select_fdr_int(self):
        model = SelectFdr()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fdr",
            [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnSelectFdr",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
Example #15
Source File: test_sklearn_feature_selection_converters.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_select_fdr_float(self):
        model = SelectFdr()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fdr",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnSelectFdr",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
Example #16
Source File: test_sklearn_feature_selection_converters.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def test_select_fwe_float(self):
        model = SelectFwe()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fwe",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnSelectFwe",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
Example #17
Source File: test_logistic.py    From h2o4gpu with Apache License 2.0 6 votes vote down vote up
def test_not_labels():
    data = load_breast_cancer()
    X = data.data
    y = data.target

    # convert class values to [0,2]
    # y = y * 2

    # Splitting data into train and test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42)

    # sklearn
    clf_sklearn = linear_model.LogisticRegression()
    clf_sklearn.fit(X_train, y_train)
    y_pred_sklearn = clf_sklearn.predict(X_test)

    # h2o
    clf_h2o = h2o4gpu.LogisticRegression()
    clf_h2o.fit(X_train, y_train)
    y_pred_h2o = clf_h2o.predict(X_test)

    assert np.allclose(accuracy_score(y_test, y_pred_sklearn), accuracy_score(y_test, y_pred_h2o.squeeze())) 
Example #18
Source File: test_des_integration.py    From DESlib with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_dataset(encode_labels, rng):
    # Generate a classification dataset
    data = load_breast_cancer()
    X = data.data
    y = data.target
    if encode_labels is not None:
        y = np.take(encode_labels, y)
    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
                                                        random_state=rng)
    # Scale the variables to have 0 mean and unit variance
    scalar = StandardScaler()
    X_train = scalar.fit_transform(X_train)
    X_test = scalar.transform(X_test)
    # Split the data into training and DSEL for DS techniques
    X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
                                                        test_size=0.5,
                                                        random_state=rng)
    # Considering a pool composed of 10 base classifiers
    # Calibrating Perceptrons to estimate probabilities
    return X_dsel, X_test, X_train, y_dsel, y_test, y_train 
Example #19
Source File: test_des_integration.py    From DESlib with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_meta_no_pool_of_classifiers(knn_methods):
    rng = np.random.RandomState(123456)

    data = load_breast_cancer()
    X = data.data
    y = data.target

    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
                                                        random_state=rng)
    # Scale the variables to have 0 mean and unit variance
    scalar = StandardScaler()
    X_train = scalar.fit_transform(X_train)
    X_test = scalar.transform(X_test)

    meta_des = METADES(knn_classifier=knn_methods, random_state=rng,
                       DSEL_perc=0.5)
    meta_des.fit(X_train, y_train)
    assert np.isclose(meta_des.score(X_test, y_test), 0.9095744680851063) 
Example #20
Source File: test_utils.py    From pyDML with GNU General Public License v3.0 5 votes vote down vote up
def breast_cancer():
    return Xy_dataset(load_breast_cancer) 
Example #21
Source File: custom_objective.py    From autogbt-alt with MIT License 5 votes vote down vote up
def main():
    X, y = load_breast_cancer(return_X_y=True)
    train_X, valid_X, train_y, valid_y = train_test_split(X, y, test_size=0.1)
    model = AutoGBTClassifier(n_trials=5, objective=CustomObjective())
    model.fit(train_X, train_y)
    print('valid AUC: %.3f' % (roc_auc_score(valid_y, model.predict(valid_X))))
    print('CV AUC: %.3f' % (model.best_score)) 
Example #22
Source File: sofm_heatmap_visualization.py    From neupy with MIT License 5 votes vote down vote up
def load_data():
    data, target = datasets.load_breast_cancer(return_X_y=True)

    scaler = preprocessing.MinMaxScaler()
    data = scaler.fit_transform(data)

    return data, target 
Example #23
Source File: common_utils.py    From interpret-community with MIT License 5 votes vote down vote up
def create_scikit_cancer_data():
    breast_cancer_data = load_breast_cancer()
    classes = breast_cancer_data.target_names.tolist()

    # Split data into train and test
    x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data.data,
                                                        breast_cancer_data.target,
                                                        test_size=0.2,
                                                        random_state=0)
    feature_names = breast_cancer_data.feature_names
    classes = breast_cancer_data.target_names.tolist()
    return x_train, x_test, y_train, y_test, feature_names, classes 
Example #24
Source File: test_distns.py    From ngboost with Apache License 2.0 5 votes vote down vote up
def cls_data(self):
        X, Y = load_breast_cancer(True)
        return train_test_split(X, Y, test_size=0.2) 
Example #25
Source File: __init__.py    From skoot with MIT License 5 votes vote down vote up
def load_breast_cancer_df(include_tgt=True, tgt_name="target", names=None):
    """Get the breast cancer dataset.

    Loads the breast cancer dataset into a dataframe with the
    target set as the "target" feature or whatever name
    is specified in ``tgt_name``.

    Parameters
    ----------
    include_tgt : bool, optional (default=True)
        Whether to include the target

    tgt_name : str, optional (default="target")
        The name of the target feature

    names : iterable or None
        The column names for the dataframe. If not
        defined, will default to the ``feature_names``
        attribute in the sklearn bunch instance.

    Returns
    -------
    X : pd.DataFrame, shape=(n_samples, n_features)
        The loaded breast cancer dataset
    """
    from sklearn.datasets import load_breast_cancer
    return _load_from_bunch(load_breast_cancer(), include_tgt,
                            tgt_name, names) 
Example #26
Source File: dominance.py    From dominance-analysis with MIT License 5 votes vote down vote up
def get_breast_cancer(cls):
		print("""The copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is downloaded from: https://goo.gl/U2Uwz2""")
		print("""Internally using load_breast_cancer function from sklearn.datasets """)
		breast_cancer_data=pd.DataFrame(data=load_breast_cancer()['data'],columns=load_breast_cancer()['feature_names'])
		breast_cancer_data['target']=load_breast_cancer()['target']
		target_dict=dict({j for i,j in zip(load_breast_cancer()['target_names'],enumerate(load_breast_cancer()['target_names']))})
		breast_cancer_data['target_names']=breast_cancer_data['target'].map(target_dict)
		return breast_cancer_data.iloc[:,:-1] 
Example #27
Source File: conftest.py    From python-sasctl with Apache License 2.0 5 votes vote down vote up
def cancer_dataset():
    """Binary classification dataset."""
    pytest.importorskip('sklearn')
    pd = pytest.importorskip('pandas')
    from sklearn import datasets

    raw = datasets.load_breast_cancer()
    df = pd.DataFrame(raw.data, columns=raw.feature_names)
    df['Type'] = raw.target
    df.Type = df.Type.astype('category')
    df.Type.cat.categories = raw.target_names
    return df 
Example #28
Source File: test_environment.py    From hyperparameter_hunter with MIT License 5 votes vote down vote up
def get_breast_cancer_data():
    data = load_breast_cancer()
    df = pd.DataFrame(data=data.data, columns=data.feature_names)
    df["diagnosis"] = data.target
    return df 
Example #29
Source File: test_cc.py    From pycobra with MIT License 5 votes vote down vote up
def setUp(self):
        # setting up our random data-set
        rng = np.random.RandomState(42)
        bc = datasets.load_breast_cancer()
        self.X = bc.data[:-20]
        self.y = bc.target[:-20]
        self.test_data = bc.data[-20:]
        self.cc = ClassifierCobra(random_state=0).fit(self.X, self.y) 
Example #30
Source File: test_plots.py    From scikit-optimize with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_plots_work_without_cat():
    """Basic smoke tests to make sure plotting doesn't crash."""
    SPACE = [
        Integer(1, 20, name='max_depth'),
        Integer(2, 100, name='min_samples_split'),
        Integer(5, 30, name='min_samples_leaf'),
        Integer(1, 30, name='max_features'),
    ]

    def objective(params):
        clf = DecisionTreeClassifier(random_state=3,
                                     **{dim.name: val
                                        for dim, val in zip(SPACE, params)
                                        if dim.name != 'dummy'})
        return -np.mean(cross_val_score(clf, *load_breast_cancer(True)))

    res = gp_minimize(objective, SPACE, n_calls=10, random_state=3)
    plots.plot_convergence(res)
    plots.plot_evaluations(res)
    plots.plot_objective(res)
    plots.plot_objective(res,
                         minimum='expected_minimum')
    plots.plot_objective(res,
                         sample_source='expected_minimum',
                         n_minimum_search=10)
    plots.plot_objective(res, sample_source='result')
    plots.plot_regret(res)

    # TODO: Compare plots to known good results?
    # Look into how matplotlib does this.