Python sklearn.datasets.load_breast_cancer() Examples

The following are 30 code examples for showing how to use sklearn.datasets.load_breast_cancer(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function .

Example 1
Project: AIX360   Author: IBM   File: test_Feature_Binarizer_From_Trees.py    License: Apache License 2.0 7 votes vote down vote up
def setUp(self) -> None:
        self.random_state = 0
        d: dict = load_breast_cancer()
        X: DataFrame = DataFrame(d['data'], columns=d['feature_names'])
        self.col_ordinal = X.columns.to_list()
        np.random.seed(self.random_state)
        s = np.array(['a', 'b', 'c'])
        X['cat alpha'] = s[np.random.randint(0, 3, len(X))]
        X['cat num'] = np.random.randint(0, 3, len(X))
        self.col_categorical = ['cat alpha', 'cat num']
        s = np.array(['a', 'b'])
        X['bin alpha'] = s[np.random.randint(0, 2, len(X))]
        X['bin num'] = np.random.randint(0, 2, len(X))
        self.col_binary = ['bin alpha', 'bin num']
        self.X = X
        self.y: ndarray = d['target']
        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(self.X, self.y, test_size=0.4, random_state=self.random_state) 
Example 2
Project: combo   Author: yzhao062   File: test_classifier_des.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = DES_LA(classifiers, local_region_size=30)
        self.clf.fit(self.X_train, self.y_train) 
Example 3
Project: combo   Author: yzhao062   File: test_cluster_comb.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.X, self.y = load_breast_cancer(return_X_y=True)

        self.n_clusters = 5
        self.n_estimators = 3

        # Initialize a set of estimators
        estimators = [KMeans(n_clusters=self.n_clusters),
                      MiniBatchKMeans(n_clusters=self.n_clusters),
                      AgglomerativeClustering(n_clusters=self.n_clusters)]

        # Clusterer Ensemble without initializing a new Class
        self.original_labels = np.zeros([self.X.shape[0], self.n_estimators])

        for i, estimator in enumerate(estimators):
            estimator.fit(self.X)
            self.original_labels[:, i] = estimator.labels_ 
Example 4
Project: combo   Author: yzhao062   File: test_classifier_stacking.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = Stacking(classifiers, n_folds=4)
        self.clf.fit(self.X_train, self.y_train) 
Example 5
Project: combo   Author: yzhao062   File: test_classifier_comb.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers, method='average') 
Example 6
Project: combo   Author: yzhao062   File: test_classifier_comb.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers, method='average')
        self.clf.fit(self.X_train, self.y_train) 
Example 7
Project: combo   Author: yzhao062   File: test_classifier_comb.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        clf_weights = np.array([0.1, 0.4, 0.1, 0.2, 0.2])

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers, method='average',
                                              weights=clf_weights)

        self.clf.fit(self.X_train, self.y_train) 
Example 8
Project: combo   Author: yzhao062   File: test_classifier_comb.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers,
                                              method='maximization')
        self.clf.fit(self.X_train, self.y_train) 
Example 9
Project: combo   Author: yzhao062   File: test_classifier_comb.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def setUp(self):
        self.roc_floor = 0.9
        self.accuracy_floor = 0.9

        random_state = 42
        X, y = load_breast_cancer(return_X_y=True)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        classifiers = [DecisionTreeClassifier(random_state=random_state),
                       LogisticRegression(random_state=random_state),
                       KNeighborsClassifier(),
                       RandomForestClassifier(random_state=random_state),
                       GradientBoostingClassifier(random_state=random_state)]

        self.clf = SimpleClassifierAggregator(classifiers,
                                              method='median')
        self.clf.fit(self.X_train, self.y_train) 
Example 10
Project: wisconsin-breast-cancer   Author: AFAgarap   File: main_nearest_neighbor.py    License: Apache License 2.0 6 votes vote down vote up
def main():
    dataset = datasets.load_breast_cancer()

    features = dataset.data
    labels = dataset.target

    num_features = features.shape[1]

    features = StandardScaler().fit_transform(features)

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.3, stratify=labels
    )

    model = NearestNeighbor(train_features, train_labels, num_features)

    model.predict(test_features, test_labels, result_path="./results/nearest_neighbor/") 
Example 11
Project: dislib   Author: bsc-wdc   File: test_gridsearch.py    License: Apache License 2.0 6 votes vote down vote up
def test_fit_2(self):
        """Tests GridSearchCV fit() with different data."""
        x_np, y_np = datasets.load_breast_cancer(return_X_y=True)
        x = ds.array(x_np, block_size=(100, 10))
        x = StandardScaler().fit_transform(x)
        y = ds.array(y_np.reshape(-1, 1), block_size=(100, 1))
        parameters = {'c': [0.1], 'gamma': [0.1]}
        csvm = CascadeSVM()
        searcher = GridSearchCV(csvm, parameters, cv=5)
        searcher.fit(x, y)

        self.assertTrue(hasattr(searcher, 'best_estimator_'))
        self.assertTrue(hasattr(searcher, 'best_score_'))
        self.assertTrue(hasattr(searcher, 'best_params_'))
        self.assertTrue(hasattr(searcher, 'best_index_'))
        self.assertTrue(hasattr(searcher, 'scorer_'))
        self.assertEqual(searcher.n_splits_, 5) 
Example 12
Project: pyfms   Author: dstein64   File: test_pyfms.py    License: MIT License 6 votes vote down vote up
def test_save_load_classifier(self):
        X, y = datasets.load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
        k = 4

        classifier_before = pyfms.Classifier(X.shape[1], k=k)
        classifier_before.fit(X_train, y_train, nb_epoch=1000)

        weights_before = classifier_before.get_weights()
        accuracy_before = accuracy_score(y_test, classifier_before.predict(X_test))

        classifier_file = os.path.join(self.workspace, 'classifier.fm')
        classifier_before.save_weights(classifier_file)

        classifier_after = pyfms.Classifier(X.shape[1])
        classifier_after.load_weights(classifier_file)

        weights_after = classifier_after.get_weights()
        accuracy_after = accuracy_score(y_test, classifier_after.predict(X_test))

        for wb, wa in zip(weights_before, weights_after):
            np.testing.assert_array_equal(wb, wa)
        self.assertEqual(accuracy_before, accuracy_after) 
Example 13
Project: sklearn-onnx   Author: onnx   File: test_sklearn_feature_selection_converters.py    License: MIT License 6 votes vote down vote up
def test_select_fdr_int(self):
        model = SelectFdr()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fdr",
            [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnSelectFdr",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
Example 14
Project: sklearn-onnx   Author: onnx   File: test_sklearn_feature_selection_converters.py    License: MIT License 6 votes vote down vote up
def test_select_fwe_int(self):
        model = SelectFwe()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fwe",
            [("input", Int64TensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnSelectFwe",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
Example 15
Project: sklearn-onnx   Author: onnx   File: test_sklearn_feature_selection_converters.py    License: MIT License 6 votes vote down vote up
def test_select_fdr_float(self):
        model = SelectFdr()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fdr",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnSelectFdr",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
Example 16
Project: sklearn-onnx   Author: onnx   File: test_sklearn_feature_selection_converters.py    License: MIT License 6 votes vote down vote up
def test_select_fwe_float(self):
        model = SelectFwe()
        X, y = load_breast_cancer(return_X_y=True)
        model.fit(X, y)
        model_onnx = convert_sklearn(
            model, "select fwe",
            [("input", FloatTensorType([None, X.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnSelectFwe",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__)"
                          " <= StrictVersion('0.2.1')",
        ) 
Example 17
Project: h2o4gpu   Author: h2oai   File: test_logistic.py    License: Apache License 2.0 6 votes vote down vote up
def test_not_labels():
    data = load_breast_cancer()
    X = data.data
    y = data.target

    # convert class values to [0,2]
    # y = y * 2

    # Splitting data into train and test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42)

    # sklearn
    clf_sklearn = linear_model.LogisticRegression()
    clf_sklearn.fit(X_train, y_train)
    y_pred_sklearn = clf_sklearn.predict(X_test)

    # h2o
    clf_h2o = h2o4gpu.LogisticRegression()
    clf_h2o.fit(X_train, y_train)
    y_pred_h2o = clf_h2o.predict(X_test)

    assert np.allclose(accuracy_score(y_test, y_pred_sklearn), accuracy_score(y_test, y_pred_h2o.squeeze())) 
Example 18
Project: DESlib   Author: scikit-learn-contrib   File: test_des_integration.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_dataset(encode_labels, rng):
    # Generate a classification dataset
    data = load_breast_cancer()
    X = data.data
    y = data.target
    if encode_labels is not None:
        y = np.take(encode_labels, y)
    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
                                                        random_state=rng)
    # Scale the variables to have 0 mean and unit variance
    scalar = StandardScaler()
    X_train = scalar.fit_transform(X_train)
    X_test = scalar.transform(X_test)
    # Split the data into training and DSEL for DS techniques
    X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
                                                        test_size=0.5,
                                                        random_state=rng)
    # Considering a pool composed of 10 base classifiers
    # Calibrating Perceptrons to estimate probabilities
    return X_dsel, X_test, X_train, y_dsel, y_test, y_train 
Example 19
Project: DESlib   Author: scikit-learn-contrib   File: test_des_integration.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_meta_no_pool_of_classifiers(knn_methods):
    rng = np.random.RandomState(123456)

    data = load_breast_cancer()
    X = data.data
    y = data.target

    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
                                                        random_state=rng)
    # Scale the variables to have 0 mean and unit variance
    scalar = StandardScaler()
    X_train = scalar.fit_transform(X_train)
    X_test = scalar.transform(X_test)

    meta_des = METADES(knn_classifier=knn_methods, random_state=rng,
                       DSEL_perc=0.5)
    meta_des.fit(X_train, y_train)
    assert np.isclose(meta_des.score(X_test, y_test), 0.9095744680851063) 
Example 20
Project: xcessiv   Author: reiinakano   File: functions.py    License: Apache License 2.0 5 votes vote down vote up
def get_sample_dataset(dataset_properties):
    """Returns sample dataset

    Args:
        dataset_properties (dict): Dictionary corresponding to the properties of the dataset
            used to verify the estimator and metric generators.

    Returns:
        X (array-like): Features array

        y (array-like): Labels array

        splits (iterator): This is an iterator that returns train test splits for
            cross-validation purposes on ``X`` and ``y``.
    """
    kwargs = dataset_properties.copy()
    data_type = kwargs.pop('type')
    if data_type == 'multiclass':
        try:
            X, y = datasets.make_classification(random_state=8, **kwargs)
            splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
        except Exception as e:
            raise exceptions.UserError(repr(e))
    elif data_type == 'iris':
        X, y = datasets.load_iris(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'mnist':
        X, y = datasets.load_digits(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'breast_cancer':
        X, y = datasets.load_breast_cancer(return_X_y=True)
        splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y)
    elif data_type == 'boston':
        X, y = datasets.load_boston(return_X_y=True)
        splits = model_selection.KFold(n_splits=2, random_state=8).split(X)
    elif data_type == 'diabetes':
        X, y = datasets.load_diabetes(return_X_y=True)
        splits = model_selection.KFold(n_splits=2, random_state=8).split(X)
    else:
        raise exceptions.UserError('Unknown dataset type {}'.format(dataset_properties['type']))
    return X, y, splits 
Example 21
Project: MKLpy   Author: IvanoLauriola   File: unit_tests.py    License: GNU General Public License v3.0 5 votes vote down vote up
def setUp(self):
		data = load_breast_cancer()
		Xtr, Xte = train_test_split(data.data, shuffle=True, test_size=.2)
		self.Xtr = torch.tensor(Xtr)
		self.Xte = torch.tensor(Xte)
		self.Str = ['aaba', 'bac', 'abac', 'waibba', 'aaiicaaac']
		self.Ste = ['aaa','bac','bababbwa'] 
Example 22
Project: MKLpy   Author: IvanoLauriola   File: unit_tests.py    License: GNU General Public License v3.0 5 votes vote down vote up
def setUp(self):
		data = load_breast_cancer()
		self.Xtr, self.Xte, self.Ytr, self.Yte = train_test_split(data.data, data.target, shuffle=True, train_size=50)
		self.Xtr = preprocessing.normalization(self.Xtr)
		self.Xte = preprocessing.normalization(self.Xte)
		self.KLtr = [pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=d) for d in range(1,6)]
		self.KLte = [pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=d) for d in range(1,6)]
		self.KLtr_g = HPK_generator(self.Xtr, degrees=range(1,6))
		self.KLte_g = HPK_generator(self.Xte, self.Xtr, degrees=range(1,6)) 
Example 23
Project: MKLpy   Author: IvanoLauriola   File: unit_tests.py    License: GNU General Public License v3.0 5 votes vote down vote up
def setUp(self):
		data = load_breast_cancer()
		self.Xtr, self.Xte, self.Ytr, self.Yte = train_test_split(data.data, data.target, shuffle=True, train_size=50) 
Example 24
Project: go-ml-transpiler   Author: znly   File: transpile.py    License: Apache License 2.0 5 votes vote down vote up
def main(export_dir):

    ## load dataset
    x, y = load_dataset(return_X_y=True)

    ## train xgb
    xgbc = xgb.XGBClassifier(n_estimators=100, max_depth=7)
    xgbc.fit(x, y)

    # transpile model
    os.mkdir(os.path.join(export_dir, "xgb"))
    transpiler = Transpiler(xgbc)
    transpiler.transpile(package_name="xgb", method_name="predict", export_method=True)
    transpiler.write(os.path.join(export_dir, "xgb"))
    print("xgb done.")


    ## train rfc
    rfc = RFC(n_estimators=100, max_depth=7)
    rfc.fit(x, y)

    # transpile model
    os.mkdir(os.path.join(export_dir, "rfc"))
    transpiler = Transpiler(rfc)
    transpiler.transpile(package_name="rfc", method_name="predict", export_method=True)
    transpiler.write(os.path.join(export_dir, "rfc"))
    print("rfc done.") 
Example 25
Project: AIX360   Author: IBM   File: feature_binarizer_from_trees_demo.py    License: Apache License 2.0 5 votes vote down vote up
def fbt_vs_fb_cancer(iterations=30, treeNum=1, treeDepth=4, numThresh=9, filename=None):
    bc = load_breast_cancer()
    X = pd.DataFrame(bc.data, columns=bc.feature_names)
    y = bc.target
    X.drop(columns=get_corr_columns(X), inplace=True)
    return fbt_vs_fb(X, y, iterations=iterations, treeNum=treeNum, treeDepth=treeDepth, numThresh=numThresh,
                     filename=filename) 
Example 26
Project: AIX360   Author: IBM   File: test_Logistic_Rule_Regression.py    License: Apache License 2.0 5 votes vote down vote up
def setUpClass(cls):
        cls.bc = load_breast_cancer() 
Example 27
Project: AIX360   Author: IBM   File: test_Boolean_Rule_CG.py    License: Apache License 2.0 5 votes vote down vote up
def setUpClass(cls):
        cls.bc = load_breast_cancer() 
Example 28
Project: sacred   Author: IDSIA   File: 07_magic.py    License: MIT License 5 votes vote down vote up
def run():
    X, y = datasets.load_breast_cancer(return_X_y=True)
    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, test_size=0.2
    )
    clf = get_model()  # Parameters are injected automatically.
    clf.fit(X_train, y_train)
    return clf.score(X_test, y_test) 
Example 29
Project: sacred   Author: IDSIA   File: 08_less_magic.py    License: MIT License 5 votes vote down vote up
def run(_config):
    X, y = datasets.load_breast_cancer(return_X_y=True)
    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, test_size=0.2
    )
    clf = get_model(
        _config["C"], _config["gamma"], _config["kernel"]
    )  # Parameters are passed explicitly.
    clf.fit(X_train, y_train)
    return clf.score(X_test, y_test) 
Example 30
Project: skutil   Author: tgsmith61591   File: util.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_breast_cancer_df(include_tgt=True, tgt_name="target", shuffle=False):
    """Loads the breast cancer dataset into a dataframe with the
    target set as the "target" feature or whatever name
    is specified in ``tgt_name``.

    Parameters
    ----------

    include_tgt : bool, optional (default=True)
        Whether to include the target

    tgt_name : str, optional (default="target")
        The name of the target feature

    shuffle : bool, optional (default=False)
        Whether to shuffle the rows


    Returns
    -------

    X : pd.DataFrame, shape=(n_samples, n_features)
        The loaded dataset
    """
    bc = load_breast_cancer()
    X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names)

    if include_tgt:
        X[tgt_name] = bc.target

    return X if not shuffle else shuffle_dataframe(X)