Python sklearn.datasets.load_digits() Examples
The following are 30 code examples for showing how to use sklearn.datasets.load_digits(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.
Example 1
Project: libact Author: ntucllab File: label_digits.py License: BSD 2-Clause "Simplified" License | 6 votes |
def split_train_test(n_classes): from sklearn.datasets import load_digits n_labeled = 5 digits = load_digits(n_class=n_classes) # consider binary case X = digits.data y = digits.target print(np.shape(X)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) while len(np.unique(y_train[:n_labeled])) < n_classes: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33) trn_ds = Dataset(X_train, np.concatenate( [y_train[:n_labeled], [None] * (len(y_train) - n_labeled)])) tst_ds = Dataset(X_test, y_test) return trn_ds, tst_ds, digits
Example 2
Project: mars Author: mars-project File: test_pca.py License: Apache License 2.0 | 6 votes |
def test_pca_score_with_different_solvers(self): digits = datasets.load_digits() X_digits = mt.tensor(digits.data) pca_dict = {svd_solver: PCA(n_components=30, svd_solver=svd_solver, random_state=0) for svd_solver in self.solver_list} for pca in pca_dict.values(): pca.fit(X_digits) # Sanity check for the noise_variance_. For more details see # https://github.com/scikit-learn/scikit-learn/issues/7568 # https://github.com/scikit-learn/scikit-learn/issues/8541 # https://github.com/scikit-learn/scikit-learn/issues/8544 assert mt.all((pca.explained_variance_ - pca.noise_variance_) >= 0).to_numpy() # Compare scores with different svd_solvers score_dict = {svd_solver: pca.score(X_digits).to_numpy() for svd_solver, pca in pca_dict.items()} assert_almost_equal(score_dict['full'], score_dict['randomized'], decimal=3)
Example 3
Project: production-tools Author: thuijskens File: train_model.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_mnist_data(): """Loads the MNIST data set into memory. Returns ------- X : array-like, shape=[n_samples, n_features] Training data for the MNIST data set. y : array-like, shape=[n_samples,] Labels for the MNIST data set. """ digits = load_digits() X, y = digits.data, digits.target y = LabelBinarizer().fit_transform(y) return X, y
Example 4
Project: rpforest Author: lyst File: test_rpforest.py License: Apache License 2.0 | 6 votes |
def _get_mnist_data(seed=None): digits = load_digits()["images"] if seed is not None: rnd = np.random.RandomState(seed=seed) else: rnd = np.random.RandomState() no_img, rows, cols = digits.shape X = digits.reshape((no_img, rows * cols)) X = np.ascontiguousarray(X) rnd.shuffle(X) X_test = X[:100] X_train = X[100:] return X_train, X_test
Example 5
Project: pyDML Author: jlsuarezdiaz File: datasets.py License: GNU General Public License v3.0 | 6 votes |
def digits_reduced(): data=load_digits() XX = data['data'] y = data['target'] nn,dd = XX.shape XX = XX.reshape([nn,8,8]) X = np.empty([nn,3]) for i in xrange(nn): X[i,0] = simetria_hor(XX[i,:,:]) X[i,1] = simetria_ver(XX[i,:,:]) X[i,2] = np.mean(XX[i,:]) return X,y ### ARFF dataframes ###
Example 6
Project: pyDML Author: jlsuarezdiaz File: datasets.py License: GNU General Public License v3.0 | 6 votes |
def digits_reduced(): data=load_digits() XX = data['data'] y = data['target'] nn,dd = XX.shape XX = XX.reshape([nn,8,8]) X = np.empty([nn,3]) for i in xrange(nn): X[i,0] = simetria_hor(XX[i,:,:]) X[i,1] = simetria_ver(XX[i,:,:]) X[i,2] = np.mean(XX[i,:]) return X,y ### ARFF dataframes ###
Example 7
Project: sklearn-onnx Author: onnx File: test_sklearn_pca_converter.py License: MIT License | 6 votes |
def test_pca_default_int_randomised(self): data = load_digits() X_train, X_test, *_ = train_test_split( data.data, data.target, test_size=0.2, random_state=42) model = PCA(random_state=42, svd_solver='randomized', iterated_power=3).fit(X_train) model_onnx = convert_sklearn( model, initial_types=[("input", Int64TensorType([None, X_test.shape[1]]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test.astype(np.int64), model, model_onnx, basename="SklearnPCADefaultIntRandomised", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example 8
Project: sklearn-onnx Author: onnx File: test_topology_prune.py License: MIT License | 6 votes |
def test_dummy_identity(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20] yd = digits.target[:20] n_samples, n_features = Xd.shape idtr = make_pipeline(IdentityTransformer(), identity()) idtr.fit(Xd, yd) update_registered_converter(IdentityTransformer, "IdentityTransformer", dummy_shape_calculator, dummy_converter) update_registered_converter(identity, "identity", dummy_shape_calculator, dummy_converter) model_onnx = convert_sklearn( idtr, "idtr", [("input", FloatTensorType([None, Xd.shape[1]]))], target_opset=TARGET_OPSET) idnode = [node for node in model_onnx.graph.node if node.op_type == "Identity"] assert len(idnode) == 2
Example 9
Project: sklearn-onnx Author: onnx File: test_sklearn_k_means_converter.py License: MIT License | 6 votes |
def test_kmeans_clustering_int(self): data = load_digits() X = data.data model = KMeans(n_clusters=4) model.fit(X) model_onnx = convert_sklearn(model, "kmeans", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.int64)[40:60], model, model_onnx, basename="SklearnKMeansInt-Dec4", # Operator gemm is not implemented in onnxruntime allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__) " "<= StrictVersion('0.2.1')", )
Example 10
Project: sklearn-onnx Author: onnx File: test_sklearn_k_means_converter.py License: MIT License | 6 votes |
def test_batchkmeans_clustering_int(self): data = load_digits() X = data.data model = MiniBatchKMeans(n_clusters=4) model.fit(X) model_onnx = convert_sklearn(model, "kmeans", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.int64)[40:60], model, model_onnx, basename="SklearnBatchKMeansInt-Dec4", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__) " "<= StrictVersion('0.2.1')", )
Example 11
Project: sklearn-onnx Author: onnx File: test_sklearn_calibrated_classifier_cv_converter.py License: MIT License | 6 votes |
def test_model_calibrated_classifier_cv_int(self): data = load_digits() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnCalibratedClassifierCVInt-Dec4", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example 12
Project: sklearn-onnx Author: onnx File: test_sklearn_feature_union.py License: MIT License | 6 votes |
def test_feature_union_transformer_weights_1(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.int64) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={'pca': 10, 'svd': 3} ).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', Int64TensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights1-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example 13
Project: sklearn-onnx Author: onnx File: test_sklearn_feature_union.py License: MIT License | 6 votes |
def test_feature_union_transformer_weights_2(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.float32) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={'pca1': 10, 'svd2': 3} ).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', FloatTensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights2-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example 14
Project: pandas-ml Author: pandas-ml File: test_metrics.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def setup_method(self): import sklearn.svm as svm digits = datasets.load_digits() self.data = digits.data self.target = digits.target self.df = pdml.ModelFrame(digits) estimator1 = self.df.svm.LinearSVC(C=1.0, random_state=self.random_state) self.df.fit(estimator1) estimator2 = svm.LinearSVC(C=1.0, random_state=self.random_state) estimator2.fit(self.data, self.target) self.pred = estimator2.predict(self.data) self.decision = estimator2.decision_function(self.data) # argument for classification reports self.labels = np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
Example 15
Project: pandas-ml Author: pandas-ml File: test_model_selection.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_train_test_split(self): df = pdml.ModelFrame(datasets.load_digits()) self.assertIsInstance(df, pdml.ModelFrame) train_df, test_df = df.model_selection.train_test_split() tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) self.assertEqual(len(df), len(train_df) + len(test_df)) self.assertEqual(df.shape[1], train_df.shape[1]) self.assertEqual(df.shape[1], test_df.shape[1]) tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) df = pdml.ModelFrame(datasets.load_digits()) df.target_name = 'xxx' train_df, test_df = df.model_selection.train_test_split() tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) self.assertEqual(train_df.target_name, 'xxx') self.assertEqual(test_df.target_name, 'xxx')
Example 16
Project: pandas-ml Author: pandas-ml File: test_model_selection.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_validation_curve(self): digits = datasets.load_digits() df = pdml.ModelFrame(digits) param_range = np.logspace(-2, -1, 2) svc = df.svm.SVC(random_state=self.random_state) result = df.model_selection.validation_curve(svc, 'gamma', param_range) expected = ms.validation_curve(svm.SVC(random_state=self.random_state), digits.data, digits.target, 'gamma', param_range) self.assertEqual(len(result), 2) self.assert_numpy_array_almost_equal(result[0], expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])
Example 17
Project: ML-From-Scratch Author: eriklindernoren File: multilayer_perceptron.py License: MIT License | 6 votes |
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
Example 18
Project: ML-From-Scratch Author: eriklindernoren File: random_forest.py License: MIT License | 6 votes |
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
Example 19
Project: ML-From-Scratch Author: eriklindernoren File: naive_bayes.py License: MIT License | 6 votes |
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
Example 20
Project: xcessiv Author: reiinakano File: functions.py License: Apache License 2.0 | 5 votes |
def get_sample_dataset(dataset_properties): """Returns sample dataset Args: dataset_properties (dict): Dictionary corresponding to the properties of the dataset used to verify the estimator and metric generators. Returns: X (array-like): Features array y (array-like): Labels array splits (iterator): This is an iterator that returns train test splits for cross-validation purposes on ``X`` and ``y``. """ kwargs = dataset_properties.copy() data_type = kwargs.pop('type') if data_type == 'multiclass': try: X, y = datasets.make_classification(random_state=8, **kwargs) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) except Exception as e: raise exceptions.UserError(repr(e)) elif data_type == 'iris': X, y = datasets.load_iris(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'mnist': X, y = datasets.load_digits(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'breast_cancer': X, y = datasets.load_breast_cancer(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'boston': X, y = datasets.load_boston(return_X_y=True) splits = model_selection.KFold(n_splits=2, random_state=8).split(X) elif data_type == 'diabetes': X, y = datasets.load_diabetes(return_X_y=True) splits = model_selection.KFold(n_splits=2, random_state=8).split(X) else: raise exceptions.UserError('Unknown dataset type {}'.format(dataset_properties['type'])) return X, y, splits
Example 21
Project: xcessiv Author: reiinakano File: extractmaindataset.py License: Apache License 2.0 | 5 votes |
def extract_main_dataset(): X, y = load_digits(return_X_y=True) return X, y
Example 22
Project: xcessiv Author: reiinakano File: test_functions.py License: Apache License 2.0 | 5 votes |
def test_correct_dataset(self): X, y = load_digits(return_X_y=True) verification_dict = functions.verify_dataset(X, y) assert verification_dict['features_shape'] == (1797,64) assert verification_dict['labels_shape'] == (1797,)
Example 23
Project: MKLpy Author: IvanoLauriola File: unit_tests.py License: GNU General Public License v3.0 | 5 votes |
def setUp(self): data = load_digits() self.Xtr, self.Xte, Ytr, Yte = train_test_split(data.data, data.target, shuffle=True, train_size=.15) self.Xtr_numpy = self.Xtr.copy() self.Xte_numpy = self.Xte.copy() self.Xtr = preprocessing.normalization(self.Xtr) self.Xte = preprocessing.normalization(self.Xte) self.Ytr = torch.Tensor(Ytr) self.Yte = torch.Tensor(Yte) self.KLtr = [pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=d) for d in range(1,11)] self.KLte = [pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=d) for d in range(1,11)]
Example 24
Project: recruit Author: Frank-qlu File: test_downstream.py License: Apache License 2.0 | 5 votes |
def test_scikit_learn(df): sklearn = import_module('sklearn') # noqa from sklearn import svm, datasets digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-1], digits.target[:-1]) clf.predict(digits.data[-1:]) # Cython import warning and traitlets
Example 25
Project: deepJDOT Author: bbdamodaran File: DatasetLoad.py License: MIT License | 5 votes |
def digits_dataload(): from sklearn import datasets Digits=datasets.load_digits() Data=Digits.data/16. label=Digits.target return Data,label
Example 26
Project: vnpy_crypto Author: birforce File: test_downstream.py License: MIT License | 5 votes |
def test_scikit_learn(df): sklearn = import_module('sklearn') # noqa from sklearn import svm, datasets digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-1], digits.target[:-1]) clf.predict(digits.data[-1:])
Example 27
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_sparse.py License: MIT License | 5 votes |
def test_unsorted_indices(): # test that the result with sorted and unsorted indices in csr is the same # we use a subset of digits as iris, blobs or make_classification didn't # show the problem digits = load_digits() X, y = digits.data[:50], digits.target[:50] X_test = sparse.csr_matrix(digits.data[50:100]) X_sparse = sparse.csr_matrix(X) coef_dense = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X, y).coef_ sparse_svc = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X_sparse, y) coef_sorted = sparse_svc.coef_ # make sure dense and sparse SVM give the same result assert_array_almost_equal(coef_dense, coef_sorted.toarray()) # reverse each row's indices def scramble_indices(X): new_data = [] new_indices = [] for i in range(1, len(X.indptr)): row_slice = slice(*X.indptr[i - 1: i + 1]) new_data.extend(X.data[row_slice][::-1]) new_indices.extend(X.indices[row_slice][::-1]) return sparse.csr_matrix((new_data, new_indices, X.indptr), shape=X.shape) X_sparse_unsorted = scramble_indices(X_sparse) X_test_unsorted = scramble_indices(X_test) assert not X_sparse_unsorted.has_sorted_indices assert not X_test_unsorted.has_sorted_indices unsorted_svc = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X_sparse_unsorted, y) coef_unsorted = unsorted_svc.coef_ # make sure unsorted indices give same result assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray()) assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted), sparse_svc.predict_proba(X_test))
Example 28
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_naive_bayes.py License: MIT License | 5 votes |
def test_check_accuracy_on_digits(): # Non regression test to make sure that any further refactoring / optim # of the NB models do not harm the performance on a slightly non-linearly # separable dataset digits = load_digits() X, y = digits.data, digits.target binary_3v8 = np.logical_or(digits.target == 3, digits.target == 8) X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8] # Multinomial NB scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10) assert_greater(scores.mean(), 0.86) scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.94) # Bernoulli NB scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10) assert_greater(scores.mean(), 0.83) scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10) assert_greater(scores.mean(), 0.92) # Gaussian NB scores = cross_val_score(GaussianNB(), X, y, cv=10) assert_greater(scores.mean(), 0.77) scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10) assert_greater(scores.mean(), 0.89) scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.86)
Example 29
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_base.py License: MIT License | 5 votes |
def test_load_digits(): digits = load_digits() assert_equal(digits.data.shape, (1797, 64)) assert_equal(numpy.unique(digits.target).size, 10) # test return_X_y option check_return_X_y(digits, partial(load_digits))
Example 30
Project: Mastering-Elasticsearch-7.0 Author: PacktPublishing File: test_base.py License: MIT License | 5 votes |
def test_load_digits_n_class_lt_10(): digits = load_digits(9) assert_equal(digits.data.shape, (1617, 64)) assert_equal(numpy.unique(digits.target).size, 9)