Python sklearn.datasets.load_digits() Examples

The following are 30 code examples of sklearn.datasets.load_digits(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.datasets , or try the search function

Example #1

Source File: label_digits.py From libact with BSD 2-Clause "Simplified" License

7 votes

def split_train_test(n_classes):
    from sklearn.datasets import load_digits

    n_labeled = 5
    digits = load_digits(n_class=n_classes)  # consider binary case
    X = digits.data
    y = digits.target
    print(np.shape(X))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    while len(np.unique(y_train[:n_labeled])) < n_classes:
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.33)

    trn_ds = Dataset(X_train, np.concatenate(
        [y_train[:n_labeled], [None] * (len(y_train) - n_labeled)]))
    tst_ds = Dataset(X_test, y_test)

    return trn_ds, tst_ds, digits

Example #2

Source File: train_model.py From production-tools with BSD 3-Clause "New" or "Revised" License

6 votes

def get_mnist_data():
    """Loads the MNIST data set into memory.

    Returns
    -------
    X : array-like, shape=[n_samples, n_features]
        Training data for the MNIST data set.
        
    y : array-like, shape=[n_samples,]
        Labels for the MNIST data set.
    """
    digits = load_digits()
    X, y = digits.data, digits.target
    y = LabelBinarizer().fit_transform(y)

    return X, y

Example #3

Source File: test_pca.py From mars with Apache License 2.0

6 votes

def test_pca_score_with_different_solvers(self):
        digits = datasets.load_digits()
        X_digits = mt.tensor(digits.data)

        pca_dict = {svd_solver: PCA(n_components=30, svd_solver=svd_solver,
                                    random_state=0)
                    for svd_solver in self.solver_list}

        for pca in pca_dict.values():
            pca.fit(X_digits)
            # Sanity check for the noise_variance_. For more details see
            # https://github.com/scikit-learn/scikit-learn/issues/7568
            # https://github.com/scikit-learn/scikit-learn/issues/8541
            # https://github.com/scikit-learn/scikit-learn/issues/8544
            assert mt.all((pca.explained_variance_ - pca.noise_variance_) >= 0).to_numpy()

        # Compare scores with different svd_solvers
        score_dict = {svd_solver: pca.score(X_digits).to_numpy()
                      for svd_solver, pca in pca_dict.items()}
        assert_almost_equal(score_dict['full'], score_dict['randomized'],
                            decimal=3)

Example #4

Source File: random_forest.py From ML-From-Scratch with MIT License

6 votes

def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2)

    clf = RandomForest(n_estimators=100)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)

Example #5

Source File: multilayer_perceptron.py From ML-From-Scratch with MIT License

6 votes

def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    # Convert the nominal y values to binary
    y = to_categorical(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1)

    # MLP
    clf = MultilayerPerceptron(n_hidden=16,
        n_iterations=1000,
        learning_rate=0.01)

    clf.fit(X_train, y_train)
    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))

Example #6

Source File: naive_bayes.py From ML-From-Scratch with MIT License

6 votes

def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = NaiveBayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)

Example #7

Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_train_test_split(self):

        df = pdml.ModelFrame(datasets.load_digits())
        self.assertIsInstance(df, pdml.ModelFrame)

        train_df, test_df = df.model_selection.train_test_split()
        tm.assert_index_equal(df.columns, train_df.columns)
        tm.assert_index_equal(df.columns, test_df.columns)

        self.assertEqual(len(df), len(train_df) + len(test_df))
        self.assertEqual(df.shape[1], train_df.shape[1])
        self.assertEqual(df.shape[1], test_df.shape[1])

        tm.assert_index_equal(df.columns, train_df.columns)
        tm.assert_index_equal(df.columns, test_df.columns)

        df = pdml.ModelFrame(datasets.load_digits())
        df.target_name = 'xxx'

        train_df, test_df = df.model_selection.train_test_split()
        tm.assert_index_equal(df.columns, train_df.columns)
        tm.assert_index_equal(df.columns, test_df.columns)
        self.assertEqual(train_df.target_name, 'xxx')
        self.assertEqual(test_df.target_name, 'xxx')

Example #8

Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def test_validation_curve(self):
        digits = datasets.load_digits()
        df = pdml.ModelFrame(digits)

        param_range = np.logspace(-2, -1, 2)

        svc = df.svm.SVC(random_state=self.random_state)
        result = df.model_selection.validation_curve(svc, 'gamma',
                                                     param_range)
        expected = ms.validation_curve(svm.SVC(random_state=self.random_state),
                                       digits.data, digits.target,
                                       'gamma', param_range)

        self.assertEqual(len(result), 2)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1])

Example #9

Source File: test_metrics.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

6 votes

def setup_method(self):
        import sklearn.svm as svm
        digits = datasets.load_digits()
        self.data = digits.data
        self.target = digits.target
        self.df = pdml.ModelFrame(digits)

        estimator1 = self.df.svm.LinearSVC(C=1.0, random_state=self.random_state)
        self.df.fit(estimator1)

        estimator2 = svm.LinearSVC(C=1.0, random_state=self.random_state)
        estimator2.fit(self.data, self.target)
        self.pred = estimator2.predict(self.data)
        self.decision = estimator2.decision_function(self.data)

        # argument for classification reports
        self.labels = np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

Example #10

Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License

6 votes

def test_feature_union_transformer_weights_2(self):
        data = load_digits()
        X, y = data.data, data.target
        X = X.astype(np.float32)
        X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
                                               random_state=42)
        model = FeatureUnion([('pca', PCA()),
                              ('svd', TruncatedSVD())],
                             transformer_weights={'pca1': 10, 'svd2': 3}
                             ).fit(X_train)
        model_onnx = convert_sklearn(
            model, 'feature union',
            [('input', FloatTensorType([None, X_test.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X_test,
            model,
            model_onnx,
            basename="SklearnFeatureUnionTransformerWeights2-Dec4",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        )

Example #11

Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License

6 votes

def test_feature_union_transformer_weights_1(self):
        data = load_digits()
        X, y = data.data, data.target
        X = X.astype(np.int64)
        X_train, X_test, *_ = train_test_split(X, y, test_size=0.5,
                                               random_state=42)
        model = FeatureUnion([('pca', PCA()),
                              ('svd', TruncatedSVD())],
                             transformer_weights={'pca': 10, 'svd': 3}
                             ).fit(X_train)
        model_onnx = convert_sklearn(
            model, 'feature union',
            [('input', Int64TensorType([None, X_test.shape[1]]))])
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X_test,
            model,
            model_onnx,
            basename="SklearnFeatureUnionTransformerWeights1-Dec4",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        )

Example #12

Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License

6 votes

def test_model_calibrated_classifier_cv_int(self):
        data = load_digits()
        X, y = data.data, data.target
        clf = MultinomialNB().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn CalibratedClassifierCVMNB",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnCalibratedClassifierCVInt-Dec4",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        )

Example #13

Source File: test_sklearn_k_means_converter.py From sklearn-onnx with MIT License

6 votes

def test_batchkmeans_clustering_int(self):
        data = load_digits()
        X = data.data
        model = MiniBatchKMeans(n_clusters=4)
        model.fit(X)
        model_onnx = convert_sklearn(model, "kmeans",
                                     [("input", Int64TensorType([None,
                                      X.shape[1]]))],
                                     target_opset=TARGET_OPSET)
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X.astype(numpy.int64)[40:60],
            model,
            model_onnx,
            basename="SklearnBatchKMeansInt-Dec4",
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__) "
                          "<= StrictVersion('0.2.1')",
        )

Example #14

Source File: test_sklearn_k_means_converter.py From sklearn-onnx with MIT License

6 votes

def test_kmeans_clustering_int(self):
        data = load_digits()
        X = data.data
        model = KMeans(n_clusters=4)
        model.fit(X)
        model_onnx = convert_sklearn(model, "kmeans",
                                     [("input", Int64TensorType([None,
                                      X.shape[1]]))],
                                     target_opset=TARGET_OPSET)
        self.assertIsNotNone(model_onnx)
        dump_data_and_model(
            X.astype(numpy.int64)[40:60],
            model,
            model_onnx,
            basename="SklearnKMeansInt-Dec4",
            # Operator gemm is not implemented in onnxruntime
            allow_failure="StrictVersion(onnx.__version__)"
                          " < StrictVersion('1.2') or "
                          "StrictVersion(onnxruntime.__version__) "
                          "<= StrictVersion('0.2.1')",
        )

Example #15

Source File: test_topology_prune.py From sklearn-onnx with MIT License

6 votes

def test_dummy_identity(self):

        digits = datasets.load_digits(n_class=6)
        Xd = digits.data[:20]
        yd = digits.target[:20]
        n_samples, n_features = Xd.shape

        idtr = make_pipeline(IdentityTransformer(), identity())
        idtr.fit(Xd, yd)

        update_registered_converter(IdentityTransformer, "IdentityTransformer",
                                    dummy_shape_calculator, dummy_converter)
        update_registered_converter(identity, "identity",
                                    dummy_shape_calculator, dummy_converter)

        model_onnx = convert_sklearn(
            idtr,
            "idtr",
            [("input", FloatTensorType([None, Xd.shape[1]]))],
            target_opset=TARGET_OPSET)

        idnode = [node for node in model_onnx.graph.node
                  if node.op_type == "Identity"]
        assert len(idnode) == 2

Example #16

Source File: test_sklearn_pca_converter.py From sklearn-onnx with MIT License

6 votes

def test_pca_default_int_randomised(self):
        data = load_digits()
        X_train, X_test, *_ = train_test_split(
            data.data, data.target, test_size=0.2, random_state=42)
        model = PCA(random_state=42, svd_solver='randomized',
                    iterated_power=3).fit(X_train)
        model_onnx = convert_sklearn(
            model,
            initial_types=[("input",
                            Int64TensorType([None, X_test.shape[1]]))],
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X_test.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnPCADefaultIntRandomised",
            allow_failure="StrictVersion("
            "onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        )

Example #17

Source File: test_rpforest.py From rpforest with Apache License 2.0

6 votes

def _get_mnist_data(seed=None):

    digits = load_digits()["images"]

    if seed is not None:
        rnd = np.random.RandomState(seed=seed)
    else:
        rnd = np.random.RandomState()

    no_img, rows, cols = digits.shape
    X = digits.reshape((no_img, rows * cols))
    X = np.ascontiguousarray(X)
    rnd.shuffle(X)

    X_test = X[:100]
    X_train = X[100:]

    return X_train, X_test

Example #18

Source File: datasets.py From pyDML with GNU General Public License v3.0

6 votes

def digits_reduced():
    data=load_digits()
    XX = data['data']
    y = data['target']
    nn,dd = XX.shape
    XX = XX.reshape([nn,8,8])

    X = np.empty([nn,3])
    for i in xrange(nn):
        X[i,0] = simetria_hor(XX[i,:,:])
        X[i,1] = simetria_ver(XX[i,:,:])
        X[i,2] = np.mean(XX[i,:])
    
    return X,y

### ARFF dataframes ###

Example #19

Source File: datasets.py From pyDML with GNU General Public License v3.0

6 votes

def digits_reduced():
    data=load_digits()
    XX = data['data']
    y = data['target']
    nn,dd = XX.shape
    XX = XX.reshape([nn,8,8])

    X = np.empty([nn,3])
    for i in xrange(nn):
        X[i,0] = simetria_hor(XX[i,:,:])
        X[i,1] = simetria_ver(XX[i,:,:])
        X[i,2] = np.mean(XX[i,:])
    
    return X,y

### ARFF dataframes ###

Example #20

Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License

5 votes

def test_cross_val_score(self):
        import sklearn.svm as svm
        digits = datasets.load_digits()

        df = pdml.ModelFrame(digits)
        clf = svm.SVC(kernel=str('linear'), C=1)
        result = df.model_selection.cross_val_score(clf, cv=5)
        expected = ms.cross_val_score(clf, X=digits.data, y=digits.target, cv=5)
        self.assert_numpy_array_almost_equal(result, expected)

Example #21

Source File: datasets.py From pyDML with GNU General Public License v3.0

5 votes

def digits(numbers=None):
    data=load_digits()     # DIGITS
    X=data['data']
    y=data['target']
    
    if numbers is None:
        numbers=[0,1,2,3,4,5,6,7,8,9]
        
    selected = np.where(np.isin(y,numbers))[0]
    return X[selected,:], y[selected]

    return X,y

Example #22

Source File: nn.py From L2L with GNU General Public License v3.0

5 votes

def main():
    from sklearn.datasets import load_digits, fetch_mldata

    SMALL_MNIST = False

    if SMALL_MNIST:
        mnist_digits = load_digits()
        n_input = np.prod(mnist_digits.images.shape[1:])
        n_images = len(mnist_digits.images)  # 1797
        data_images = mnist_digits.images.reshape(n_images, -1) / 16.  # -> 1797 x 64
        data_targets = mnist_digits.target
        # im_size_x, im_size_y = 8, 8
    else:
        mnist_digits = fetch_mldata('MNIST original')
        n_input = np.prod(mnist_digits.data.shape[1:])
        data_images = mnist_digits.data / 255.  # -> 70000 x 284
        data_targets = mnist_digits.target
        # im_size_x, im_size_y = 28, 28

    n_hidden, n_output = 5, 10
    nn = NeuralNetworkClassifier(n_input, n_hidden, n_output)
    weight_shapes = nn.get_weights_shapes()
    weights = []
    for weight_shape in weight_shapes:
        weights.append(np.random.randn(*weight_shape))
    nn.set_weights(*weights)
    score = nn.score(data_images, data_targets)
    print("Score is: ", score)

Example #23

Source File: optimizee.py From L2L with GNU General Public License v3.0

5 votes

def __init__(self, traj, parameters):
        super().__init__(traj)

        if parameters.use_small_mnist:
            # 8 x 8 images
            mnist_digits = load_digits()
            n_input = np.prod(mnist_digits.images.shape[1:])
            n_images = len(mnist_digits.images)  # 1797
            data_images = mnist_digits.images.reshape(n_images, -1) / 16.  # -> 1797 x 64
            data_targets = mnist_digits.target
        else:
            # 28 x 28 images
            mnist_digits = fetch_mldata('MNIST original')
            n_input = np.prod(mnist_digits.data.shape[1:])
            data_images = mnist_digits.data / 255.  # -> 70000 x 284
            n_images = len(data_images)
            data_targets = mnist_digits.target

        self.n_images = n_images
        self.data_images, self.data_targets = data_images, data_targets

        seed = parameters.seed
        n_hidden = parameters.n_hidden

        seed = np.uint32(seed)
        self.random_state = np.random.RandomState(seed=seed)

        n_output = 10  # This is always true for mnist
        self.nn = NeuralNetworkClassifier(n_input, n_hidden, n_output)

        self.random_state = np.random.RandomState(seed=seed)

        # create_individual can be called because __init__ is complete except for traj initializtion
        indiv_dict = self.create_individual()
        for key, val in indiv_dict.items():
            traj.individual.f_add_parameter(key, val)
        traj.individual.f_add_parameter('seed', seed)

Example #24

Source File: adaboost.py From ML-From-Scratch with MIT License

5 votes

def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    digit1 = 1
    digit2 = 8
    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
    y = data.target[idx]
    # Change labels to {-1, 1}
    y[y == digit1] = -1
    y[y == digit2] = 1
    X = data.data[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification with 5 weak classifiers
    clf = Adaboost(n_clf=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print ("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)

Example #25

Source File: principal_component_analysis.py From ML-From-Scratch with MIT License

5 votes

def main():

    # Demo of how to reduce the dimensionality of the data to two dimension
    # and plot the results. 

    # Load the dataset
    data = datasets.load_digits()
    X = data.data
    y = data.target

    # Project the data onto the 2 primary principal components
    X_trans = PCA().transform(X, 2)

    x1 = X_trans[:, 0]
    x2 = X_trans[:, 1]

    cmap = plt.get_cmap('viridis')
    colors = [cmap(i) for i in np.linspace(0, 1, len(np.unique(y)))]

    class_distr = []
    # Plot the different class distributions
    for i, l in enumerate(np.unique(y)):
        _x1 = x1[y == l]
        _x2 = x2[y == l]
        _y = y[y == l]
        class_distr.append(plt.scatter(_x1, _x2, color=colors[i]))

    # Add a legend
    plt.legend(class_distr, y, loc=1)

    # Axis labels
    plt.suptitle("PCA Dimensionality Reduction")
    plt.title("Digit Dataset")
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.show()

Example #26

Source File: perceptron.py From ML-From-Scratch with MIT License

5 votes

def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    # One-hot encoding of nominal y-values
    y = to_categorical(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
        learning_rate=0.001, 
        loss=CrossEntropy,
        activation_function=Sigmoid)
    clf.fit(X_train, y_train)

    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))

Example #27

Source File: test_utils.py From pyDML with GNU General Public License v3.0

5 votes

def digits_data():
    data = load_digits()     # DIGITS
    X = data['data']
    y = data['target']

    return X, y

Example #28

Source File: datasets.py From pyDML with GNU General Public License v3.0

5 votes

def digits(numbers=None):
    data=load_digits()     # DIGITS
    X=data['data']
    y=data['target']
    
    if numbers is None:
        numbers=[0,1,2,3,4,5,6,7,8,9]
        
    selected = np.where(np.isin(y,numbers))[0]
    return X[selected,:], y[selected]

    return X,y

Example #29

Source File: test_sparse.py From twitter-stock-recommendation with MIT License

5 votes

def test_unsorted_indices():
    # test that the result with sorted and unsorted indices in csr is the same
    # we use a subset of digits as iris, blobs or make_classification didn't
    # show the problem
    digits = load_digits()
    X, y = digits.data[:50], digits.target[:50]
    X_test = sparse.csr_matrix(digits.data[50:100])

    X_sparse = sparse.csr_matrix(X)
    coef_dense = svm.SVC(kernel='linear', probability=True,
                         random_state=0).fit(X, y).coef_
    sparse_svc = svm.SVC(kernel='linear', probability=True,
                         random_state=0).fit(X_sparse, y)
    coef_sorted = sparse_svc.coef_
    # make sure dense and sparse SVM give the same result
    assert_array_almost_equal(coef_dense, coef_sorted.toarray())

    X_sparse_unsorted = X_sparse[np.arange(X.shape[0])]
    X_test_unsorted = X_test[np.arange(X_test.shape[0])]

    # make sure we scramble the indices
    assert_false(X_sparse_unsorted.has_sorted_indices)
    assert_false(X_test_unsorted.has_sorted_indices)

    unsorted_svc = svm.SVC(kernel='linear', probability=True,
                           random_state=0).fit(X_sparse_unsorted, y)
    coef_unsorted = unsorted_svc.coef_
    # make sure unsorted indices give same result
    assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray())
    assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted),
                              sparse_svc.predict_proba(X_test))

Example #30

Source File: test_pnn.py From neupy with MIT License

5 votes

def test_digit_prediction(self):
        dataset = datasets.load_digits()
        x_train, x_test, y_train, y_test = train_test_split(
            dataset.data, dataset.target, test_size=0.3
        )

        nw = algorithms.PNN(verbose=False, std=10)
        nw.train(x_train, y_train)
        result = nw.predict(x_test)

        accuracy = metrics.accuracy_score(y_test, result)
        self.assertAlmostEqual(accuracy, 0.9889, places=4)