Python sklearn.utils.check_random_state() Examples
The following are 30
code examples of sklearn.utils.check_random_state().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils
, or try the search function
.

Example #1
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 9 votes |
def test_regression(): # Check regression for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [0.5, 1.0], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyRegressor(), DecisionTreeRegressor(), KNeighborsRegressor(), SVR(gamma='scale')]: for params in grid: BaggingRegressor(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #2
Source File: diagnostics.py From yatsm with MIT License | 6 votes |
def __init__(self, roi, n_folds=3, mask_values=[0], shuffle=False, random_state=None): self.roi = roi self.n_folds = n_folds if isinstance(mask_values, (float, int)): self.mask_values = np.array([mask_values]) elif isinstance(mask_values, (list, tuple)): self.mask_values = np.array(mask_values) elif isinstance(mask_values, np.ndarray): self.mask_values = mask_values else: raise TypeError('mask_values must be float, int, list, tuple,' ' or np.ndarray') if shuffle: self.shuffle = True self.rng = check_random_state(random_state) self._label_roi()
Example #3
Source File: generate.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def sample_blobs(n, ratio, rows=5, cols=5, sep=10, rs=None): rs = check_random_state(rs) # ratio is eigenvalue ratio correlation = (ratio - 1) / (ratio + 1) # generate within-blob variation mu = np.zeros(2) sigma = np.eye(2) X = rs.multivariate_normal(mu, sigma, size=n) corr_sigma = np.array([[1, correlation], [correlation, 1]]) Y = rs.multivariate_normal(mu, corr_sigma, size=n) # assign to blobs X[:, 0] += rs.randint(rows, size=n) * sep X[:, 1] += rs.randint(cols, size=n) * sep Y[:, 0] += rs.randint(rows, size=n) * sep Y[:, 1] += rs.randint(cols, size=n) * sep return X, Y ################################################################################ ### Sample images from GANs
Example #4
Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0 | 6 votes |
def __init__(self, test_model=False, verify_model=True): model = Word2Vec.load(modelfile) if(test_model): acc = model.accuracy(questionfile) logger.info("Test model " + modelfile + " in " + questionfile) self.vector_size = model.vector_size self.vocab_size = len(model.wv.vocab) + 1 self.word2index = self.GetWord2Index(model) self.index2word = self.GetIndex2Word(model) self.wordvector = self.GetWordVector(model) if(verify_model): logger.info("Verifing imported word2vec model") random_state = check_random_state(12) check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000) for index in check_index: word_wv = model.wv.index2word[index] word_our = self.index2word[index+1] #print(index, word_wv, word_our) assert word_wv == word_our assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1 assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]]) logger.info("Imported word2vec model is verified")
Example #5
Source File: word2vec_helpers.py From question-classification-cnn-rnn-attention with Apache License 2.0 | 6 votes |
def __init__(self, test_model=False, verify_model=True): model = Word2Vec.load(modelfile) if(test_model): acc = model.accuracy(questionfile) logger.info("Test model " + modelfile + " in " + questionfile) self.vector_size = model.vector_size self.vocab_size = len(model.wv.vocab) + 1 self.word2index = self.GetWord2Index(model) self.index2word = self.GetIndex2Word(model) self.wordvector = self.GetWordVector(model) if(verify_model): logger.info("Verifing imported word2vec model") random_state = check_random_state(12) check_index = random_state.randint(low=0, high=self.vocab_size-2,size=1000) for index in check_index: word_wv = model.wv.index2word[index] word_our = self.index2word[index+1] #print(index, word_wv, word_our) assert word_wv == word_our assert model.wv.vocab[word_our].index == self.word2index[word_our] - 1 assert np.array_equal(model.wv[word_our], self.wordvector[self.word2index[word_our]]) logger.info("Imported word2vec model is verified")
Example #6
Source File: test_truncated_svd.py From mars with Apache License 2.0 | 6 votes |
def setUp(self): # Make an X that looks somewhat like a small tf-idf matrix. # XXX newer versions of SciPy >0.16 have scipy.sparse.rand for this. shape = 60, 55 n_samples, n_features = shape rng = check_random_state(42) X = rng.randint(-100, 20, np.product(shape)).reshape(shape) X = sp.csr_matrix(np.maximum(X, 0), dtype=np.float64) X.data[:] = 1 + np.log(X.data) self.X = X self.Xdense = X.A self.n_samples = n_samples self.n_features = n_features self.session = new_session().as_default() self._old_executor = self.session._sess._executor self.executor = self.session._sess._executor = \ ExecutorForTest('numpy', storage=self.session._sess._context)
Example #7
Source File: slm.py From revrand with Apache License 2.0 | 6 votes |
def __init__(self, basis=LinearBasis(), var=Parameter(gamma(1.), Positive()), tol=1e-8, maxiter=1000, nstarts=100, random_state=None ): """See class docstring.""" self.basis = basis self.var = var self.tol = tol self.maxiter = maxiter self.nstarts = nstarts self.random_state = random_state self.random_ = check_random_state(random_state)
Example #8
Source File: glm.py From revrand with Apache License 2.0 | 6 votes |
def __init__(self, likelihood=Gaussian(), basis=LinearBasis(), K=10, maxiter=3000, batch_size=10, updater=None, nsamples=50, nstarts=500, random_state=None ): """See class docstring.""" self.likelihood = likelihood self.basis = basis self.K = K self.maxiter = maxiter self.batch_size = batch_size self.updater = updater self.nsamples = nsamples self.nstarts = nstarts self.random_state = random_state # For clone compatibility self.random_ = check_random_state(self.random_state)
Example #9
Source File: basis_functions.py From revrand with Apache License 2.0 | 6 votes |
def __init__(self, nbases, Xdim, mean=Parameter(norm_dist(), Bound()), lenscale=Parameter(gamma(1.), Positive()), regularizer=None, random_state=None ): """See this class's docstring.""" self.random_state = random_state # for repr self._random = check_random_state(random_state) self._init_dims(nbases, Xdim) self._params = [self._init_param(mean), self._init_param(lenscale)] self._init_matrices() super(_LengthScaleBasis, self).__init__(regularizer)
Example #10
Source File: test_randomized_lasso.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 6 votes |
def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245): rng = check_random_state(random_state) sigma = np.eye(p) sigma[0, 2] = rho sigma[2, 0] = rho sigma[1, 2] = rho sigma[2, 1] = rho X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n,)) beta = np.zeros(p) beta[:2] = 1.0 epsilon = rng.normal(0.0, 0.25, size=(n,)) y = np.matmul(X, beta) + epsilon return X, y
Example #11
Source File: plot_randomized_lasso_path.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 6 votes |
def generate_experiment_data(n=200, p=200, rho=0.6, random_state=3245): rng = check_random_state(random_state) sigma = np.eye(p) sigma[0, 2] = rho sigma[2, 0] = rho sigma[1, 2] = rho sigma[2, 1] = rho X = rng.multivariate_normal(mean=np.zeros(p), cov=sigma, size=(n,)) beta = np.zeros(p) beta[:2] = 1.0 epsilon = rng.normal(0.0, 0.25, size=(n,)) y = np.matmul(X, beta) + epsilon return X, y
Example #12
Source File: lmdd.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def _check_params(n_iter, dis_measure, random_state): """Internal function to check for and validate class parameters. Also, to return random state instance and the appropriate dissimilarity measure if valid. """ if isinstance(n_iter, int): check_parameter(n_iter, low=1, param_name='n_iter') else: raise TypeError("n_iter should be int, got %s" % n_iter) if isinstance(dis_measure, str): if dis_measure not in ('aad', 'var', 'iqr'): raise ValueError("Unknown dissimilarity measure type, " "dis_measure should be in " "(\'aad\', \'var\', \'iqr\'), " "got %s" % dis_measure) # TO-DO: 'mad': Median Absolute Deviation to be added # once Scipy stats version 1.3.0 is released else: raise TypeError("dis_measure should be str, got %s" % dis_measure) return check_random_state(random_state), _aad if dis_measure == 'aad' \ else (np.var if dis_measure == 'var' else (stats.iqr if dis_measure == 'iqr' else None))
Example #13
Source File: test_graphical_lasso.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_graphical_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphicalLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X) finally: sys.stdout = orig_stdout # Smoke test with specified alphas GraphicalLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
Example #14
Source File: test_graph_lasso.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_graph_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X) finally: sys.stdout = orig_stdout # Smoke test with specified alphas GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
Example #15
Source File: test_pls.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_pls_scaling(): # sanity check for scale=True n_samples = 1000 n_targets = 5 n_features = 10 rng = check_random_state(0) Q = rng.randn(n_targets, n_features) Y = rng.randn(n_samples, n_targets) X = np.dot(Y, Q) + 2 * rng.randn(n_samples, n_features) + 1 X *= 1000 X_scaled = StandardScaler().fit_transform(X) pls = pls_.PLSRegression(n_components=5, scale=True) pls.fit(X, Y) score = pls.score(X, Y) pls.fit(X_scaled, Y) score_scaled = pls.score(X_scaled, Y) assert_approx_equal(score, score_scaled)
Example #16
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_iforest_parallel_regression(): """Check parallel regression.""" rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = IsolationForest(n_jobs=3, random_state=0).fit(X_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = IsolationForest(n_jobs=1, random_state=0).fit(X_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
Example #17
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_iforest_performance(): """Test Isolation Forest performs well""" # Generate train/test data rng = check_random_state(2) X = 0.3 * rng.randn(120, 2) X_train = np.r_[X + 2, X - 2] X_train = X[:100] # Generate some abnormal novel observations X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) X_test = np.r_[X[100:], X_outliers] y_test = np.array([0] * 20 + [1] * 20) # fit the model clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train) # predict scores (the lower, the more normal) y_pred = - clf.decision_function(X_test) # check that there is at most 6 errors (false positive or false negative) assert_greater(roc_auc_score(y_test, y_pred), 0.98)
Example #18
Source File: test_iforest.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_iforest_warm_start(): """Test iterative addition of iTrees to an iForest """ rng = check_random_state(0) X = rng.randn(20, 2) # fit first 10 trees clf = IsolationForest(n_estimators=10, max_samples=20, random_state=rng, warm_start=True) clf.fit(X) # remember the 1st tree tree_1 = clf.estimators_[0] # fit another 10 trees clf.set_params(n_estimators=20) clf.fit(X) # expecting 20 fitted trees and no overwritten trees assert len(clf.estimators_) == 20 assert clf.estimators_[0] is tree_1
Example #19
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_classification(): # Check classification for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "max_features": [1, 2, 4], "bootstrap": [True, False], "bootstrap_features": [True, False]}) for base_estimator in [None, DummyClassifier(), Perceptron(tol=1e-3), DecisionTreeClassifier(), KNeighborsClassifier(), SVC(gamma="scale")]: for params in grid: BaggingClassifier(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
Example #20
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_bootstrap_samples(): # Test that bootstrapping samples generate non-perfect base estimators. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) base_estimator = DecisionTreeRegressor().fit(X_train, y_train) # without bootstrap, all trees are perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=False, random_state=rng).fit(X_train, y_train) assert_equal(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # with bootstrap, trees are no longer perfect on the training set ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_samples=1.0, bootstrap=True, random_state=rng).fit(X_train, y_train) assert_greater(base_estimator.score(X_train, y_train), ensemble.score(X_train, y_train)) # check that each sampling correspond to a complete bootstrap resample. # the size of each bootstrap should be the same as the input data but # the data should be different (checked using the hash of the data). ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(), bootstrap=True).fit(X_train, y_train) training_hash = [] for estimator in ensemble.estimators_: assert estimator.training_size_ == X_train.shape[0] training_hash.append(estimator.training_hash_) assert len(set(training_hash)) == len(training_hash)
Example #21
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_bootstrap_features(): # Test that bootstrapping features may generate duplicate features. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=False, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_equal(boston.data.shape[1], np.unique(features).shape[0]) ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, bootstrap_features=True, random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: assert_greater(boston.data.shape[1], np.unique(features).shape[0])
Example #22
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_single_estimator(): # Check singleton ensembles. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) clf1 = BaggingRegressor(base_estimator=KNeighborsRegressor(), n_estimators=1, bootstrap=False, bootstrap_features=False, random_state=rng).fit(X_train, y_train) clf2 = KNeighborsRegressor().fit(X_train, y_train) assert_array_almost_equal(clf1.predict(X_test), clf2.predict(X_test))
Example #23
Source File: test_bagging.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_parallel_regression(): # Check parallel regression. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
Example #24
Source File: preprocessing.py From dataiku-contrib with Apache License 2.0 | 5 votes |
def __init__(self, saved_model, project_key=None, random_state=None): self.encoder = None self.categorical_names_map = None self.classes = None self.random_state = check_random_state(random_state) if project_key: self.project_key = project_key else: try: self.project_key = os.environ["DKU_CURRENT_PROJECT_KEY"] except: raise Exception('you must provide a project key or run the lib from DSS') try: self.predictor = saved_model.get_predictor() self.predictor_params = self.predictor.params self.predictor_features = self.predictor.get_features() except: raise #Sanity check if self.predictor.params.model_type != "PREDICTION": raise TypeError('Lime Preprocessor applies only to prediction models') else: if self.predictor.params.core_params[constants.PREDICTION_TYPE] == 'REGRESSION': #TODO implement regression raise NotImplementedError('Lime Preprocessor does not implement Regression') self.classes = self.get_classes() #additional sanity check for multi-class if self.classes is None: raise ValueError('Predictor does not seem to be a classifier, no classes found') #FIXME: hardcoded - anyway to retreive this dynamically? self.predictor_proba_fmt = 'proba_{}'
Example #25
Source File: explanation.py From dataiku-contrib with Apache License 2.0 | 5 votes |
def __init__(self, train_df, saved_model, kernel_width, ridge_alpha=float(1.0) , preprocessing_params=None, random_state=None): self.random_state = check_random_state(random_state) self.preprocessor = LimePreprocessor(saved_model) self.kernel = LimeKernel(kernel_width) self.preprocessor.fit(train_df) #used for rigde regression self.ridge_alpha = ridge_alpha
Example #26
Source File: diagnostics.py From yatsm with MIT License | 5 votes |
def __init__(self, y, row, col, n_folds=3, shuffle=False, random_state=None): if y.size != row.size or y.size != col.size: raise ValueError('Labels provided (y) must be the same size as ' 'the row and columns provided') self.y = y self.row = row self.col = col self.n_folds = n_folds if shuffle: self.shuffle = True self.rng = check_random_state(random_state) self._recreate_labels()
Example #27
Source File: generate.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 5 votes |
def sample_SG(n, dim, rs=None): rs = check_random_state(rs) mu = np.zeros(dim) sigma = np.eye(dim) X = rs.multivariate_normal(mu, sigma, size=n) Y = rs.multivariate_normal(mu, sigma, size=n) return X, Y
Example #28
Source File: generate.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 5 votes |
def sample_GMD(n, dim, rs=None): rs = check_random_state(rs) mu = np.zeros(dim) sigma = np.eye(dim) X = rs.multivariate_normal(mu, sigma, size=n) mu[0] += 1 Y = rs.multivariate_normal(mu, sigma, size=n) return X, Y
Example #29
Source File: generate.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 5 votes |
def sample_GVD(n, dim, rs=None): rs = check_random_state(rs) mu = np.zeros(dim) sigma = np.eye(dim) X = rs.multivariate_normal(mu, sigma, size=n) sigma[0, 0] = 2 Y = rs.multivariate_normal(mu, sigma, size=n) return X, Y
Example #30
Source File: binning.py From pygbm with MIT License | 5 votes |
def _find_binning_thresholds(data, max_bins=256, subsample=int(2e5), random_state=None): """Extract feature-wise equally-spaced quantiles from numerical data Return ------ binning_thresholds: tuple of arrays For each feature, stores the increasing numeric values that can be used to separate the bins. len(binning_thresholds) == n_features. """ if not (2 <= max_bins <= 256): raise ValueError(f'max_bins={max_bins} should be no smaller than 2 ' f'and no larger than 256.') rng = check_random_state(random_state) if subsample is not None and data.shape[0] > subsample: subset = rng.choice(np.arange(data.shape[0]), subsample) data = data[subset] dtype = data.dtype if dtype.kind != 'f': dtype = np.float32 percentiles = np.linspace(0, 100, num=max_bins + 1)[1:-1] binning_thresholds = [] for f_idx in range(data.shape[1]): col_data = np.ascontiguousarray(data[:, f_idx], dtype=dtype) distinct_values = np.unique(col_data) if len(distinct_values) <= max_bins: midpoints = (distinct_values[:-1] + distinct_values[1:]) midpoints *= .5 else: # We sort again the data in this case. We could compute # approximate midpoint percentiles using the output of # np.unique(col_data, return_counts) instead but this is more # work and the performance benefit will be limited because we # work on a fixed-size subsample of the full data. midpoints = np.percentile(col_data, percentiles, interpolation='midpoint').astype(dtype) binning_thresholds.append(midpoints) return tuple(binning_thresholds)