Python scipy.stats.randint() Examples

The following are 30 code examples of scipy.stats.randint(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.stats , or try the search function .
Example #1
Source File: _proximity_forest.py    From sktime with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def lcss_distance_measure_getter(X):
    """
    generate the lcss distance measure
    :param X: dataset to derive parameter ranges from
    :return: distance measure and parameter range dictionary
    """
    stdp = dataset_properties.stdp(X)
    instance_length = dataset_properties.max_instance_length(
        X)  # todo should this use the max instance
    # length for unequal length dataset instances?
    max_raw_warping_window = np.floor((instance_length + 1) / 4)
    n_dimensions = 1  # todo use other dimensions
    return {
        'distance_measure': [cython_wrapper(lcss_distance)],
        'dim_to_use': stats.randint(low=0, high=n_dimensions),
        'epsilon': stats.uniform(0.2 * stdp, stdp - 0.2 * stdp),
        # scipy stats randint is exclusive on the max value, hence + 1
        'delta': stats.randint(low=0, high=max_raw_warping_window + 1)
    } 
Example #2
Source File: _proximity_forest.py    From sktime with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def erp_distance_measure_getter(X):
    """
    generate the erp distance measure
    :param X: dataset to derive parameter ranges from
    :return: distance measure and parameter range dictionary
    """
    stdp = dataset_properties.stdp(X)
    instance_length = dataset_properties.max_instance_length(
        X)  # todo should this use the max instance
    # length for unequal length dataset instances?
    max_raw_warping_window = np.floor((instance_length + 1) / 4)
    n_dimensions = 1  # todo use other dimensions
    return {
        'distance_measure': [cython_wrapper(erp_distance)],
        'dim_to_use': stats.randint(low=0, high=n_dimensions),
        'g': stats.uniform(0.2 * stdp, 0.8 * stdp - 0.2 * stdp),
        'band_size': stats.randint(low=0, high=max_raw_warping_window + 1)
        # scipy stats randint is exclusive on the max value, hence + 1
    } 
Example #3
Source File: test_model_selection.py    From mlens with MIT License 6 votes vote down vote up
def test_w_prep_fit():
    """[Model Selection] Test run with preprocessing, single step."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100,
                    verbose=True)

    with open(os.devnull, 'w') as f, redirect_stdout(f):

        evl.fit(X, y,
                estimators=[OLS()],
                param_dicts={'ols': {'offset': randint(1, 10)}},
                preprocessing={'pr': [Scale()], 'no': []},
                n_iter=3)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['no.ols'],
            -24.903229451043195)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['pr.ols'],
            -26.510708862278072, 1)

    assert evl.results['params']['no.ols']['offset'] == 4
    assert evl.results['params']['pr.ols']['offset'] == 4 
Example #4
Source File: test_distributions.py    From Computable with MIT License 6 votes vote down vote up
def test_rvs(self):
        vals = stats.randint.rvs(5,30,size=100)
        assert_(numpy.all(vals < 30) & numpy.all(vals >= 5))
        assert_(len(vals) == 100)
        vals = stats.randint.rvs(5,30,size=(2,50))
        assert_(numpy.shape(vals) == (2,50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.randint.rvs(15,46)
        assert_((val >= 15) & (val < 46))
        assert_(isinstance(val, numpy.ScalarType), msg=repr(type(val)))
        val = stats.randint(15,46).rvs(3)
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example #5
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_RandomMultipleJobs(self):
        clf = RandomizedSearchCV(
                    self.__estimator__(layers=[L("Sigmoid", units=12), L(self.__output__)], n_iter=1),
                    param_distributions={'hidden0__units': randint(4, 12)},
                    n_iter=4, n_jobs=4)
        clf.fit(self.a_in, self.a_out) 
Example #6
Source File: test_distributions.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_randint(self):
        # Use a discrete distribution w/ parameter-dependent support, which
        # is larger than the default chunksize
        lo, hi = 0, 113
        res = stats.randint.expect(lambda x: x, (lo, hi))
        assert_allclose(res,
            sum(_ for _ in range(lo, hi)) / (hi - lo), atol=1e-15) 
Example #7
Source File: test_hyperband.py    From scikit-hyperband with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setup():
    model = RandomForestClassifier()
    rng = check_random_state(42)
    param_dist = {'max_depth': [3, None],
                  'max_features': sp_randint(1, 11),
                  'min_samples_split': sp_randint(2, 11),
                  'bootstrap': [True, False],
                  'criterion': ['gini', 'entropy']}
    
    digits = load_digits()
    X, y = digits.data, digits.target

    return model, param_dist, X, y, rng 
Example #8
Source File: test_search.py    From Surprise with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_randomizedsearchcv_parameter_combinations_with_distribution():
    """Ensure the parameter_combinations attribute populates correctly by
    checking its length."""
    param_distributions = {'bsl_options': {'method': ['als', 'sgd'],
                                           'reg': [1, 2]},
                           'k': randint(2, 4),  # min inclusive, max exclusive
                           'sim_options': {'name': ['msd', 'cosine'],
                                           'min_support': [1, 5],
                                           'user_based': [False]}
                           }
    rs = RandomizedSearchCV(SVD, param_distributions, n_iter=10)
    assert len(rs.param_combinations) == 10 
Example #9
Source File: stochastic_history.py    From pynoddy with GNU General Public License v2.0 5 votes vote down vote up
def gen_hist(self, name:str, path:str=""):
        """Generate a random Noddy history file and save it.

        Args:
            name: History filename.
            path: Filepath to save the history file to. Default: "" (execution folder)
        """
        self.n_faults = np.random.randint(self.faults_low,
                                          self.faults_high)
        self.n_layers = np.random.randint(self.layer_low,
                                          self.layer_high)
        if self.verbose:
            print("n layers:", self.n_layers)
            print("n faults:", self.n_faults)

        nm = pynoddy.history.NoddyHistory()

        # stratigraphy
        nm.add_event('stratigraphy', self._gen_strat())

        unconf = False
        unconf = self.has_unconf(nm, unconf)

        # tilting
        nm.add_event('tilt', self._gen_tilt())

                # folding
        if bool(np.random.randint(0, 2)):
            nm.add_event("fold", self._gen_fold())

        unconf = self.has_unconf(nm, unconf)

        # faults
        for n in range(self.n_faults):
            fault_options = self._gen_fault(n)
            nm.add_event('fault', fault_options)

        unconf = self.has_unconf(nm, unconf)

        history = name + ".his"
        nm.write_history(path + "/" + history) 
Example #10
Source File: stochastic_history.py    From pynoddy with GNU General Public License v2.0 5 votes vote down vote up
def has_unconf(self, nm, unconf):
        if not unconf:
            if not bool(np.random.randint(0, 16)):
                nm.add_event("unconformity", self._gen_unconf())
                return True
            else:
                return False
        else:
            return True 
Example #11
Source File: test_movielens.py    From lightfm with Apache License 2.0 5 votes vote down vote up
def test_sklearn_cv():

    model = LightFM(loss="warp", random_state=42)

    # Set distributions for hyperparameters
    randint = stats.randint(low=1, high=65)
    randint.random_state = 42
    gamma = stats.gamma(a=1.2, loc=0, scale=0.13)
    gamma.random_state = 42
    distr = {"no_components": randint, "learning_rate": gamma}

    # Custom score function
    def scorer(est, x, y=None):
        return precision_at_k(est, x).mean()

    # Dummy custom CV to ensure shape preservation.
    class CV(KFold):
        def split(self, X, y=None, groups=None):
            idx = np.arange(X.shape[0])
            for _ in range(self.n_splits):
                yield idx, idx

    cv = CV(n_splits=3, random_state=42)
    search = RandomizedSearchCV(
        estimator=model,
        param_distributions=distr,
        n_iter=2,
        scoring=scorer,
        random_state=42,
        cv=cv,
    )
    search.fit(train)
    assert search.best_params_["no_components"] == 58 
Example #12
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_RandomLayerParams(self):
        clf = RandomizedSearchCV(
                    self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1),
                    param_distributions={'hidden0__units': randint(4, 12)},
                    n_iter=2)
        clf.fit(self.a_in, self.a_out) 
Example #13
Source File: test_validation.py    From scikit-hyperband with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setup():
    model = RandomForestClassifier()
    param_dist = {"max_depth": [3, None],
                  "max_features": sp_randint(1, 11),
                  "min_samples_split": sp_randint(2, 11),
                  "min_samples_leaf": sp_randint(1, 11),
                  "bootstrap": [True, False],
                  "criterion": ["gini", "entropy"]}

    return model, param_dist 
Example #14
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setUp(self):
        self.a_in = numpy.random.uniform(0.0, 1.0, (64,16))
        self.a_out = numpy.random.randint(0, 4, (64,)) 
Example #15
Source File: test_sklearn.py    From scikit-neuralnetwork with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_Classifier(self):
        a_in = numpy.random.uniform(0.0, 1.0, (64,16))
        a_out = numpy.random.randint(0, 4, (64,))

        cross_val_score(MLPC(layers=[L("Softmax")], n_iter=1), a_in, a_out, cv=5) 
Example #16
Source File: _proximity_forest.py    From sktime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def msm_distance_measure_getter(X):
    """
    generate the msm distance measure
    :param X: dataset to derive parameter ranges from
    :return: distance measure and parameter range dictionary
    """
    n_dimensions = 1  # todo use other dimensions
    return {
        'distance_measure': [cython_wrapper(msm_distance)],
        'dim_to_use': stats.randint(low=0, high=n_dimensions),
        'c': [0.01, 0.01375, 0.0175, 0.02125, 0.025, 0.02875, 0.0325,
              0.03625, 0.04, 0.04375, 0.0475, 0.05125,
              0.055, 0.05875, 0.0625, 0.06625, 0.07, 0.07375, 0.0775,
              0.08125, 0.085, 0.08875, 0.0925, 0.09625,
              0.1, 0.136, 0.172, 0.208,
              0.244, 0.28, 0.316, 0.352, 0.388, 0.424, 0.46, 0.496,
              0.532, 0.568, 0.604, 0.64, 0.676, 0.712, 0.748,
              0.784, 0.82, 0.856,
              0.892, 0.928, 0.964, 1, 1.36, 1.72, 2.08, 2.44, 2.8,
              3.16, 3.52, 3.88, 4.24, 4.6, 4.96, 5.32, 5.68,
              6.04, 6.4, 6.76, 7.12,
              7.48, 7.84, 8.2, 8.56, 8.92, 9.28, 9.64, 10, 13.6, 17.2,
              20.8, 24.4, 28, 31.6, 35.2, 38.8, 42.4, 46,
              49.6, 53.2, 56.8, 60.4,
              64, 67.6, 71.2, 74.8, 78.4, 82, 85.6, 89.2, 92.8, 96.4,
              100]
    } 
Example #17
Source File: _proximity_forest.py    From sktime with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y):
        """
        Build the classifier on the training set (X, y)
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.  If a Pandas data frame is passed,
            column 0 is extracted.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, enforce_univariate=True)
        self.X = dataset_properties.positive_dataframe_indices(X)
        self.random_state = check_random_state(self.random_state)
        # setup label encoding
        if self.label_encoder is None:
            self.label_encoder = LabelEncoder()
            y = self.label_encoder.fit_transform(y)
        self.y = y
        self.classes_ = self.label_encoder.classes_
        if self.distance_measure is None:
            if self.get_distance_measure is None:
                self.get_distance_measure = self.setup_distance_measure_getter(
                    self)
            self.distance_measure = self.get_distance_measure(self)
        if self.n_jobs > 1 or self.n_jobs < 0:
            parallel = Parallel(self.n_jobs)
            self.trees = parallel(delayed(self._fit_tree)(
                X, y, index,
                self.random_state.randint(0, self.n_estimators))
                                  for index in range(self.n_estimators))
        else:
            self.trees = [self._fit_tree(
                X, y, index, self.random_state.randint(0, self.n_estimators))
                          for index in range(self.n_estimators)]
        self._is_fitted = True
        return self 
Example #18
Source File: test_distributions.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_cdf(self):
        x = numpy.r_[0:36:100j]
        k = numpy.floor(x)
        out = numpy.select([k >= 30, k >= 5], [1.0, (k-5.0+1)/(30-5.0)], 0)
        vals = stats.randint.cdf(x, 5, 30)
        assert_array_almost_equal(vals, out, decimal=12) 
Example #19
Source File: test_distributions.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_pdf(self):
        k = numpy.r_[0:36]
        out = numpy.where((k >= 5) & (k < 30), 1.0/(30-5), 0)
        vals = stats.randint.pmf(k, 5, 30)
        assert_array_almost_equal(vals, out) 
Example #20
Source File: test_distributions.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_rvs(self):
        vals = stats.randint.rvs(5, 30, size=100)
        assert_(numpy.all(vals < 30) & numpy.all(vals >= 5))
        assert_(len(vals) == 100)
        vals = stats.randint.rvs(5, 30, size=(2, 50))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.randint.rvs(15, 46)
        assert_((val >= 15) & (val < 46))
        assert_(isinstance(val, numpy.ScalarType), msg=repr(type(val)))
        val = stats.randint(15, 46).rvs(3)
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example #21
Source File: mnist_novelty_detection.py    From mHTM with MIT License 5 votes vote down vote up
def parallel_params(log_dir, niter=10000, seed=123456789):
	"""
	Create the parameters for a parallel run.
	
	@param log_dir: The directory to store the results in.
	
	@param niter: The number of iterations to perform.
	
	@param seed: The seed for the random number generators.
	
	@return: Returns a tuple containing the parameters.
	"""
	
	static_params = {
		'ninputs': 784,
		'trim': 1e-4,
		'disable_boost': True,
		'seed': seed,
		'pct_active': None,
		'random_permanence': True,
		'pwindow': 0.5,		
		'global_inhibition': True,
		'syn_th': 0.5,
		'pinc': 0.001,
		'pdec': 0.001,		
		'nepochs': 10
	}
	dynamic_params = {
		'ncolumns': randint(500, 3500),
		'nactive': uniform(0.5, 0.35), # As a % of the number of columns
		'nsynapses': randint(25, 784),
		'seg_th': uniform(0, 0.2), # As a % of the number of synapses
		'log_dir': log_dir
	}
	
	# Build the parameter generator
	gen = ParamGenerator(dynamic_params, niter, 1, 784)
	params = {key:gen for key in dynamic_params}
	
	return static_params, params 
Example #22
Source File: test_distributions.py    From Computable with MIT License 5 votes vote down vote up
def test_cdf(self):
        x = numpy.r_[0:36:100j]
        k = numpy.floor(x)
        out = numpy.select([k >= 30,k >= 5],[1.0,(k-5.0+1)/(30-5.0)],0)
        vals = stats.randint.cdf(x,5,30)
        assert_array_almost_equal(vals, out, decimal=12) 
Example #23
Source File: test_model_selection.py    From mlens with MIT License 5 votes vote down vote up
def test_params():
    """[Model Selection] Test raises on bad params."""
    evl = Evaluator(mape_scorer, verbose=2)

    np.testing.assert_raises(ValueError,
                             evl.fit, X, y,
                             estimators=[OLS()],
                             param_dicts={'bad.ols':
                                          {'offset': randint(1, 10)}},
                             preprocessing={'prep': [Scale()]}) 
Example #24
Source File: test_distributions.py    From Computable with MIT License 5 votes vote down vote up
def test_pdf(self):
        k = numpy.r_[0:36]
        out = numpy.where((k >= 5) & (k < 30), 1.0/(30-5), 0)
        vals = stats.randint.pmf(k,5,30)
        assert_array_almost_equal(vals,out) 
Example #25
Source File: test_model_selection.py    From mlens with MIT License 5 votes vote down vote up
def test_w_prep_set_params():
    """[Model Selection] Test run with preprocessing, sep param dists."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100,
                    verbose=2)

    params = {'no.ols': {'offset': randint(3, 6)},
              'pr.ols': {'offset': randint(1, 3)},
              }

    with open(os.devnull, 'w') as f, redirect_stdout(f):

        evl.fit(X, y,
                estimators={'pr': [OLS()], 'no': [OLS()]},
                param_dicts=params,
                preprocessing={'pr': [Scale()], 'no': []},
                n_iter=10)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['no.ols'],
            -18.684229451043198)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['pr.ols'],
            -7.2594502123869491)
    assert evl.results['params']['no.ols']['offset'] == 3
    assert evl.results['params']['pr.ols']['offset'] == 1 
Example #26
Source File: test_model_selection.py    From mlens with MIT License 5 votes vote down vote up
def test_no_prep():
    """[Model Selection] Test run without preprocessing."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False,
                    random_state=100, verbose=12)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        evl.fit(X, y,
                estimators=[OLS()],
                param_dicts={'ols': {'offset': randint(1, 10)}},
                n_iter=3)

    np.testing.assert_approx_equal(
            evl.results['test_score-m']['ols'],
            -24.903229451043195)
    assert evl.results['params']['ols']['offset'] == 4 
Example #27
Source File: test_model_selection.py    From mlens with MIT License 5 votes vote down vote up
def test_passes():
    """[Model Selection] Test sets error score on failed scoring."""

    evl = Evaluator(bad_scorer, error_score=0, n_jobs=1, verbose=5)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        evl = np.testing.assert_warns(FitFailedWarning,
                                      evl.fit, X, y,
                                      estimators=[OLS()],
                                      param_dicts={'ols':
                                                   {'offset': randint(1, 10)}},
                                      n_iter=1)
    assert evl.results['test_score-m']['ols'] == 0 
Example #28
Source File: test_model_selection.py    From mlens with MIT License 5 votes vote down vote up
def test_raises():
    """[Model Selection] Test raises on error."""

    evl = Evaluator(bad_scorer, verbose=1)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        np.testing.assert_raises(
            ValueError, evl.fit, X, y, estimators=[OLS()],
            param_dicts={'ols': {'offset': randint(1, 10)}}, n_iter=1) 
Example #29
Source File: test_big.py    From skutil with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def test_large_grid():
        """In this test, we purposely overfit a RandomForest to completely random data
        in order to assert that the test error will far supercede the train error.
        """

        if not SK18:
            custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
        else:
            custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)

        # define the pipe
        pipe = Pipeline([
            ('scaler', SelectiveScaler()),
            ('pca', SelectivePCA(weight=True)),
            ('rf', RandomForestClassifier(random_state=42))
        ])

        # define hyper parameters
        hp = {
            'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
            'pca__whiten': [True, False],
            'pca__weight': [True, False],
            'pca__n_components': uniform(0.75, 0.15),
            'rf__n_estimators': randint(5, 10),
            'rf__max_depth': randint(5, 15)
        }

        # define the grid
        grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)

        # this will fail because we haven't fit yet
        assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)

        # fit the grid
        grid.fit(X_train, y_train)

        # score for coverage -- this might warn...
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grid.score(X_train, y_train)

        # coverage:
        assert grid._estimator_type == 'classifier'

        # get predictions
        tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)

        # evaluate score (SHOULD be better than random...)
        accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)

        # grid score reports:
        # assert fails for bad percentile
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})

        # assert fails for bad y_axis
        assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})

        # assert passes otherwise
        report_grid_score_detail(grid, charts=True, percentile=0.95)  # just ensure percentile works 
Example #30
Source File: _proximity_forest.py    From sktime with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def pick_rand_param_perm_from_dict(param_pool, random_state):
    """
    pick a parameter permutation given a list of dictionaries contain
    potential values OR a list of values OR a
    distribution of values (a distribution must have the .rvs() function to
    sample values)
    ----------
    param_pool : list of dicts OR list OR distribution
        parameters in the same format as GridSearchCV from scikit-learn.
        example:
        param_grid = [
          {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
          {'C': [1, 10, 100, 1000], 'gamma': [{'C': [1, 10, 100, 1000],
          'kernel': ['linear']}],
          'kernel': ['rbf']},
         ]
    Returns
    -------
    param_perm : dict
        distance measure and corresponding parameters in dictionary format
    """
    # construct empty permutation
    param_perm = {}
    # for each parameter
    for param_name, param_values in param_pool.items():
        # if it is a list
        if isinstance(param_values, list):
            # randomly pick a value
            param_value = param_values[random_state.randint(len(param_values))]
            # if the value is another dict then get a random parameter
            # permutation from that dict (recursive over
            # 2 funcs)
            # if isinstance(param_value, dict): # no longer require
            # recursive param perms
            #     param_value = _pick_param_permutation(param_value,
            #     random_state)
        # else if parameter is a distribution
        elif hasattr(param_values, 'rvs'):
            # sample from the distribution
            param_value = param_values.rvs(random_state=random_state)
        else:
            # otherwise we don't know how to obtain a value from the parameter
            raise Exception('unknown type of parameter pool')
        # add parameter name and value to permutation
        param_perm[param_name] = param_value
    return param_perm