Python sklearn.datasets.load_digits() Examples
The following are 30
code examples of sklearn.datasets.load_digits().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.datasets
, or try the search function
.

Example #1
Source File: label_digits.py From libact with BSD 2-Clause "Simplified" License | 7 votes |
def split_train_test(n_classes): from sklearn.datasets import load_digits n_labeled = 5 digits = load_digits(n_class=n_classes) # consider binary case X = digits.data y = digits.target print(np.shape(X)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) while len(np.unique(y_train[:n_labeled])) < n_classes: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33) trn_ds = Dataset(X_train, np.concatenate( [y_train[:n_labeled], [None] * (len(y_train) - n_labeled)])) tst_ds = Dataset(X_test, y_test) return trn_ds, tst_ds, digits
Example #2
Source File: test_pca.py From mars with Apache License 2.0 | 6 votes |
def test_pca_score_with_different_solvers(self): digits = datasets.load_digits() X_digits = mt.tensor(digits.data) pca_dict = {svd_solver: PCA(n_components=30, svd_solver=svd_solver, random_state=0) for svd_solver in self.solver_list} for pca in pca_dict.values(): pca.fit(X_digits) # Sanity check for the noise_variance_. For more details see # https://github.com/scikit-learn/scikit-learn/issues/7568 # https://github.com/scikit-learn/scikit-learn/issues/8541 # https://github.com/scikit-learn/scikit-learn/issues/8544 assert mt.all((pca.explained_variance_ - pca.noise_variance_) >= 0).to_numpy() # Compare scores with different svd_solvers score_dict = {svd_solver: pca.score(X_digits).to_numpy() for svd_solver, pca in pca_dict.items()} assert_almost_equal(score_dict['full'], score_dict['randomized'], decimal=3)
Example #3
Source File: train_model.py From production-tools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_mnist_data(): """Loads the MNIST data set into memory. Returns ------- X : array-like, shape=[n_samples, n_features] Training data for the MNIST data set. y : array-like, shape=[n_samples,] Labels for the MNIST data set. """ digits = load_digits() X, y = digits.data, digits.target y = LabelBinarizer().fit_transform(y) return X, y
Example #4
Source File: test_rpforest.py From rpforest with Apache License 2.0 | 6 votes |
def _get_mnist_data(seed=None): digits = load_digits()["images"] if seed is not None: rnd = np.random.RandomState(seed=seed) else: rnd = np.random.RandomState() no_img, rows, cols = digits.shape X = digits.reshape((no_img, rows * cols)) X = np.ascontiguousarray(X) rnd.shuffle(X) X_test = X[:100] X_train = X[100:] return X_train, X_test
Example #5
Source File: datasets.py From pyDML with GNU General Public License v3.0 | 6 votes |
def digits_reduced(): data=load_digits() XX = data['data'] y = data['target'] nn,dd = XX.shape XX = XX.reshape([nn,8,8]) X = np.empty([nn,3]) for i in xrange(nn): X[i,0] = simetria_hor(XX[i,:,:]) X[i,1] = simetria_ver(XX[i,:,:]) X[i,2] = np.mean(XX[i,:]) return X,y ### ARFF dataframes ###
Example #6
Source File: datasets.py From pyDML with GNU General Public License v3.0 | 6 votes |
def digits_reduced(): data=load_digits() XX = data['data'] y = data['target'] nn,dd = XX.shape XX = XX.reshape([nn,8,8]) X = np.empty([nn,3]) for i in xrange(nn): X[i,0] = simetria_hor(XX[i,:,:]) X[i,1] = simetria_ver(XX[i,:,:]) X[i,2] = np.mean(XX[i,:]) return X,y ### ARFF dataframes ###
Example #7
Source File: test_sklearn_pca_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_pca_default_int_randomised(self): data = load_digits() X_train, X_test, *_ = train_test_split( data.data, data.target, test_size=0.2, random_state=42) model = PCA(random_state=42, svd_solver='randomized', iterated_power=3).fit(X_train) model_onnx = convert_sklearn( model, initial_types=[("input", Int64TensorType([None, X_test.shape[1]]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test.astype(np.int64), model, model_onnx, basename="SklearnPCADefaultIntRandomised", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #8
Source File: test_topology_prune.py From sklearn-onnx with MIT License | 6 votes |
def test_dummy_identity(self): digits = datasets.load_digits(n_class=6) Xd = digits.data[:20] yd = digits.target[:20] n_samples, n_features = Xd.shape idtr = make_pipeline(IdentityTransformer(), identity()) idtr.fit(Xd, yd) update_registered_converter(IdentityTransformer, "IdentityTransformer", dummy_shape_calculator, dummy_converter) update_registered_converter(identity, "identity", dummy_shape_calculator, dummy_converter) model_onnx = convert_sklearn( idtr, "idtr", [("input", FloatTensorType([None, Xd.shape[1]]))], target_opset=TARGET_OPSET) idnode = [node for node in model_onnx.graph.node if node.op_type == "Identity"] assert len(idnode) == 2
Example #9
Source File: test_sklearn_k_means_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_kmeans_clustering_int(self): data = load_digits() X = data.data model = KMeans(n_clusters=4) model.fit(X) model_onnx = convert_sklearn(model, "kmeans", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.int64)[40:60], model, model_onnx, basename="SklearnKMeansInt-Dec4", # Operator gemm is not implemented in onnxruntime allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__) " "<= StrictVersion('0.2.1')", )
Example #10
Source File: test_sklearn_k_means_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_batchkmeans_clustering_int(self): data = load_digits() X = data.data model = MiniBatchKMeans(n_clusters=4) model.fit(X) model_onnx = convert_sklearn(model, "kmeans", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.int64)[40:60], model, model_onnx, basename="SklearnBatchKMeansInt-Dec4", allow_failure="StrictVersion(onnx.__version__)" " < StrictVersion('1.2') or " "StrictVersion(onnxruntime.__version__) " "<= StrictVersion('0.2.1')", )
Example #11
Source File: test_sklearn_calibrated_classifier_cv_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_calibrated_classifier_cv_int(self): data = load_digits() X, y = data.data, data.target clf = MultinomialNB().fit(X, y) model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y) model_onnx = convert_sklearn( model, "scikit-learn CalibratedClassifierCVMNB", [("input", Int64TensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnCalibratedClassifierCVInt-Dec4", allow_failure="StrictVersion(onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #12
Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License | 6 votes |
def test_feature_union_transformer_weights_1(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.int64) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={'pca': 10, 'svd': 3} ).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', Int64TensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights1-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #13
Source File: test_sklearn_feature_union.py From sklearn-onnx with MIT License | 6 votes |
def test_feature_union_transformer_weights_2(self): data = load_digits() X, y = data.data, data.target X = X.astype(np.float32) X_train, X_test, *_ = train_test_split(X, y, test_size=0.5, random_state=42) model = FeatureUnion([('pca', PCA()), ('svd', TruncatedSVD())], transformer_weights={'pca1': 10, 'svd2': 3} ).fit(X_train) model_onnx = convert_sklearn( model, 'feature union', [('input', FloatTensorType([None, X_test.shape[1]]))]) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test, model, model_onnx, basename="SklearnFeatureUnionTransformerWeights2-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #14
Source File: test_metrics.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setup_method(self): import sklearn.svm as svm digits = datasets.load_digits() self.data = digits.data self.target = digits.target self.df = pdml.ModelFrame(digits) estimator1 = self.df.svm.LinearSVC(C=1.0, random_state=self.random_state) self.df.fit(estimator1) estimator2 = svm.LinearSVC(C=1.0, random_state=self.random_state) estimator2.fit(self.data, self.target) self.pred = estimator2.predict(self.data) self.decision = estimator2.decision_function(self.data) # argument for classification reports self.labels = np.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
Example #15
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_train_test_split(self): df = pdml.ModelFrame(datasets.load_digits()) self.assertIsInstance(df, pdml.ModelFrame) train_df, test_df = df.model_selection.train_test_split() tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) self.assertEqual(len(df), len(train_df) + len(test_df)) self.assertEqual(df.shape[1], train_df.shape[1]) self.assertEqual(df.shape[1], test_df.shape[1]) tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) df = pdml.ModelFrame(datasets.load_digits()) df.target_name = 'xxx' train_df, test_df = df.model_selection.train_test_split() tm.assert_index_equal(df.columns, train_df.columns) tm.assert_index_equal(df.columns, test_df.columns) self.assertEqual(train_df.target_name, 'xxx') self.assertEqual(test_df.target_name, 'xxx')
Example #16
Source File: test_model_selection.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_validation_curve(self): digits = datasets.load_digits() df = pdml.ModelFrame(digits) param_range = np.logspace(-2, -1, 2) svc = df.svm.SVC(random_state=self.random_state) result = df.model_selection.validation_curve(svc, 'gamma', param_range) expected = ms.validation_curve(svm.SVC(random_state=self.random_state), digits.data, digits.target, 'gamma', param_range) self.assertEqual(len(result), 2) self.assert_numpy_array_almost_equal(result[0], expected[0]) self.assert_numpy_array_almost_equal(result[1], expected[1])
Example #17
Source File: multilayer_perceptron.py From ML-From-Scratch with MIT License | 6 votes |
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
Example #18
Source File: random_forest.py From ML-From-Scratch with MIT License | 6 votes |
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
Example #19
Source File: naive_bayes.py From ML-From-Scratch with MIT License | 6 votes |
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
Example #20
Source File: functions.py From xcessiv with Apache License 2.0 | 5 votes |
def get_sample_dataset(dataset_properties): """Returns sample dataset Args: dataset_properties (dict): Dictionary corresponding to the properties of the dataset used to verify the estimator and metric generators. Returns: X (array-like): Features array y (array-like): Labels array splits (iterator): This is an iterator that returns train test splits for cross-validation purposes on ``X`` and ``y``. """ kwargs = dataset_properties.copy() data_type = kwargs.pop('type') if data_type == 'multiclass': try: X, y = datasets.make_classification(random_state=8, **kwargs) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) except Exception as e: raise exceptions.UserError(repr(e)) elif data_type == 'iris': X, y = datasets.load_iris(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'mnist': X, y = datasets.load_digits(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'breast_cancer': X, y = datasets.load_breast_cancer(return_X_y=True) splits = model_selection.StratifiedKFold(n_splits=2, random_state=8).split(X, y) elif data_type == 'boston': X, y = datasets.load_boston(return_X_y=True) splits = model_selection.KFold(n_splits=2, random_state=8).split(X) elif data_type == 'diabetes': X, y = datasets.load_diabetes(return_X_y=True) splits = model_selection.KFold(n_splits=2, random_state=8).split(X) else: raise exceptions.UserError('Unknown dataset type {}'.format(dataset_properties['type'])) return X, y, splits
Example #21
Source File: extractmaindataset.py From xcessiv with Apache License 2.0 | 5 votes |
def extract_main_dataset(): X, y = load_digits(return_X_y=True) return X, y
Example #22
Source File: test_functions.py From xcessiv with Apache License 2.0 | 5 votes |
def test_correct_dataset(self): X, y = load_digits(return_X_y=True) verification_dict = functions.verify_dataset(X, y) assert verification_dict['features_shape'] == (1797,64) assert verification_dict['labels_shape'] == (1797,)
Example #23
Source File: unit_tests.py From MKLpy with GNU General Public License v3.0 | 5 votes |
def setUp(self): data = load_digits() self.Xtr, self.Xte, Ytr, Yte = train_test_split(data.data, data.target, shuffle=True, train_size=.15) self.Xtr_numpy = self.Xtr.copy() self.Xte_numpy = self.Xte.copy() self.Xtr = preprocessing.normalization(self.Xtr) self.Xte = preprocessing.normalization(self.Xte) self.Ytr = torch.Tensor(Ytr) self.Yte = torch.Tensor(Yte) self.KLtr = [pairwise_mk.homogeneous_polynomial_kernel(self.Xtr, degree=d) for d in range(1,11)] self.KLte = [pairwise_mk.homogeneous_polynomial_kernel(self.Xte, self.Xtr, degree=d) for d in range(1,11)]
Example #24
Source File: test_downstream.py From recruit with Apache License 2.0 | 5 votes |
def test_scikit_learn(df): sklearn = import_module('sklearn') # noqa from sklearn import svm, datasets digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-1], digits.target[:-1]) clf.predict(digits.data[-1:]) # Cython import warning and traitlets
Example #25
Source File: DatasetLoad.py From deepJDOT with MIT License | 5 votes |
def digits_dataload(): from sklearn import datasets Digits=datasets.load_digits() Data=Digits.data/16. label=Digits.target return Data,label
Example #26
Source File: test_downstream.py From vnpy_crypto with MIT License | 5 votes |
def test_scikit_learn(df): sklearn = import_module('sklearn') # noqa from sklearn import svm, datasets digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-1], digits.target[:-1]) clf.predict(digits.data[-1:])
Example #27
Source File: test_sparse.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_unsorted_indices(): # test that the result with sorted and unsorted indices in csr is the same # we use a subset of digits as iris, blobs or make_classification didn't # show the problem digits = load_digits() X, y = digits.data[:50], digits.target[:50] X_test = sparse.csr_matrix(digits.data[50:100]) X_sparse = sparse.csr_matrix(X) coef_dense = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X, y).coef_ sparse_svc = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X_sparse, y) coef_sorted = sparse_svc.coef_ # make sure dense and sparse SVM give the same result assert_array_almost_equal(coef_dense, coef_sorted.toarray()) # reverse each row's indices def scramble_indices(X): new_data = [] new_indices = [] for i in range(1, len(X.indptr)): row_slice = slice(*X.indptr[i - 1: i + 1]) new_data.extend(X.data[row_slice][::-1]) new_indices.extend(X.indices[row_slice][::-1]) return sparse.csr_matrix((new_data, new_indices, X.indptr), shape=X.shape) X_sparse_unsorted = scramble_indices(X_sparse) X_test_unsorted = scramble_indices(X_test) assert not X_sparse_unsorted.has_sorted_indices assert not X_test_unsorted.has_sorted_indices unsorted_svc = svm.SVC(kernel='linear', probability=True, random_state=0).fit(X_sparse_unsorted, y) coef_unsorted = unsorted_svc.coef_ # make sure unsorted indices give same result assert_array_almost_equal(coef_unsorted.toarray(), coef_sorted.toarray()) assert_array_almost_equal(sparse_svc.predict_proba(X_test_unsorted), sparse_svc.predict_proba(X_test))
Example #28
Source File: test_naive_bayes.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_accuracy_on_digits(): # Non regression test to make sure that any further refactoring / optim # of the NB models do not harm the performance on a slightly non-linearly # separable dataset digits = load_digits() X, y = digits.data, digits.target binary_3v8 = np.logical_or(digits.target == 3, digits.target == 8) X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8] # Multinomial NB scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10) assert_greater(scores.mean(), 0.86) scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.94) # Bernoulli NB scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10) assert_greater(scores.mean(), 0.83) scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10) assert_greater(scores.mean(), 0.92) # Gaussian NB scores = cross_val_score(GaussianNB(), X, y, cv=10) assert_greater(scores.mean(), 0.77) scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10) assert_greater(scores.mean(), 0.89) scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10) assert_greater(scores.mean(), 0.86)
Example #29
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_load_digits(): digits = load_digits() assert_equal(digits.data.shape, (1797, 64)) assert_equal(numpy.unique(digits.target).size, 10) # test return_X_y option check_return_X_y(digits, partial(load_digits))
Example #30
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_load_digits_n_class_lt_10(): digits = load_digits(9) assert_equal(digits.data.shape, (1617, 64)) assert_equal(numpy.unique(digits.target).size, 9)