Python scipy.stats.expon() Examples

The following are code examples for showing how to use scipy.stats.expon(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: deep_image_model   Author: tobegit3hub   File: exponential_test.py    Apache License 2.0 7 votes vote down vote up
def testExponentialSampleMultiDimensional(self):
    with self.test_session():
      batch_size = 2
      lam_v = [3.0, 22.0]
      lam = tf.constant([lam_v] * batch_size)

      exponential = tf.contrib.distributions.Exponential(lam=lam)

      n = 100000
      samples = exponential.sample(n, seed=138)
      self.assertEqual(samples.get_shape(), (n, batch_size, 2))

      sample_values = samples.eval()

      self.assertFalse(np.any(sample_values < 0.0))
      for i in range(2):
        self.assertLess(
            stats.kstest(
                sample_values[:, 0, i], stats.expon(scale=1.0/lam_v[i]).cdf)[0],
            0.01)
        self.assertLess(
            stats.kstest(
                sample_values[:, 1, i], stats.expon(scale=1.0/lam_v[i]).cdf)[0],
            0.01) 
Example 2
Project: pymach   Author: gusseppe   File: improve2.py    MIT License 7 votes vote down vote up
def adaboost_paramC(self, method='GridSearchCV'):
        if method == 'GridSearchCV' or method == 'GeneticSearchCV' or method == 'EdasSearch':
            parameters = {
                'selector__pca__svd_solver': ['full', 'arpack', 'randomized'],
                'selector__pca__whiten': [True,False],
                'AdaBoostClassifier__n_estimators': [50,75,100],
                'AdaBoostClassifier__learning_rate': [0.5,1.0,1.5],
                'AdaBoostClassifier__algorithm' : ['SAMME', 'SAMME.R']
            }
        elif  method == 'RandomizedSearchCV':
            parameters = {
                'selector__pca__svd_solver': ['full', 'arpack', 'randomized'],
                'selector__pca__whiten': [True,False],
                'AdaBoostClassifier__n_estimators': randint(25,100),
                # 'AdaBoostClassifier__learning_rate': [0.5,1.0,1.5],
                'AdaBoostClassifier__learning_rate': expon(0,1),
                'AdaBoostClassifier__algorithm' : ['SAMME', 'SAMME.R']
            }
        else:
            pass
        return parameters 
Example 3
Project: pymach   Author: gusseppe   File: improve2.py    MIT License 7 votes vote down vote up
def gradientboosting_paramC(self, method='GridSearchCV'):
        if method == 'GridSearchCV' or method == 'GeneticSearchCV' or method == 'EdasSearch':
            parameters = {
            'selector__pca__svd_solver': ['full', 'arpack', 'randomized'],
            'selector__pca__whiten': [True,False],
            'GradientBoostingClassifier__n_estimators': [200, 250],
            'GradientBoostingClassifier__max_depth': [3,6,9],
            'GradientBoostingClassifier__learning_rate': [0.1, 0.2, 0.3]
            }
        elif  method == 'RandomizedSearchCV':
            parameters = {
            'selector__pca__svd_solver': ['full', 'arpack', 'randomized'],
            'selector__pca__whiten': [True,False],
            'GradientBoostingClassifier__n_estimators': randint(200,250),
            'GradientBoostingClassifier__max_depth': randint(3,9),
            'GradientBoostingClassifier__learning_rate': expon(0,1)
            }
        else:
            pass
        return parameters 
Example 4
Project: pymach   Author: gusseppe   File: improve2.py    MIT License 7 votes vote down vote up
def adaboost_paramR(self,method='GridSearchCV'):
        if method == 'GridSearchCV' or method == 'GeneticSearchCV' or method == 'EdasSearch':
            parameters = {
            'selector__pca__svd_solver': ['full', 'arpack', 'randomized'],
            'selector__pca__whiten': [True,False],
            'AdaBoostRegressor__n_estimators': [50,75,100],
            'AdaBoostRegressor__learning_rate': [0.5,1.0,1.5,2.0],
            'AdaBoostRegressor__loss' : ['linear', 'square', 'exponential']
            }
        elif  method == 'RandomizedSearchCV':
            parameters = {
            'selector__pca__svd_solver': ['full', 'arpack', 'randomized'],# n_components=3 must be stricly less than n_features=3 with svd_solver='arpack'
            'selector__pca__whiten': [True,False],
            'AdaBoostRegressor__n_estimators': randint(50,100),
            'AdaBoostRegressor__learning_rate': expon(0,5),
            'AdaBoostRegressor__loss' : ['linear', 'square', 'exponential']
            }
        else:
            pass
        return parameters 
Example 5
Project: chainer   Author: chainer   File: test_exponential.py    MIT License 6 votes vote down vote up
def setUp_configure(self):
        from scipy import stats
        self.dist = distributions.Exponential
        self.scipy_dist = stats.expon

        self.test_targets = set([
            'batch_shape', 'cdf', 'entropy', 'event_shape', 'icdf', 'log_prob',
            'mean', 'sample', 'support', 'variance'])

        lam = numpy.exp(numpy.random.uniform(
            -1, 1, self.shape)).astype(numpy.float32)
        lam = numpy.asarray(lam)
        self.params = {'lam': lam}
        self.scipy_params = {'scale': 1 / lam}

        self.support = 'positive' 
Example 6
Project: deep_image_model   Author: tobegit3hub   File: exponential_test.py    Apache License 2.0 6 votes vote down vote up
def testExponentialLogPDF(self):
    with tf.Session():
      batch_size = 6
      lam = tf.constant([2.0] * batch_size)
      lam_v = 2.0
      x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32)
      exponential = tf.contrib.distributions.Exponential(lam=lam)
      expected_log_pdf = stats.expon.logpdf(x, scale=1 / lam_v)

      log_pdf = exponential.log_pdf(x)
      self.assertEqual(log_pdf.get_shape(), (6,))
      self.assertAllClose(log_pdf.eval(), expected_log_pdf)

      pdf = exponential.pdf(x)
      self.assertEqual(pdf.get_shape(), (6,))
      self.assertAllClose(pdf.eval(), np.exp(expected_log_pdf)) 
Example 7
Project: pymach   Author: gusseppe   File: improve.py    MIT License 6 votes vote down vote up
def pipeline(self):

        self.improve_grid_search()

        return self

    # @property
    # def gradientboosting_param(self, method='grid'):
    #
    #     parameters = {
    #         'selector__extraTC__n_estimators': [10, 15, 20, 25],
    #         'selector__extraTC__criterion': ['gini', 'entropy'],
    #         'selector__extraTC__n_jobs': [-1],
    #         'selector__pca__svd_solver': ['auto', 'full', 'arpack', 'randomized'],
    #         'selector__pca__whiten': [True,False],
    #         'GradientBoostingClassifier__n_estimators': [100, 150, 200],
    #         'GradientBoostingClassifier__learning_rate': [0.1, 0.2, 0.4, 0.8, 1.0]
    #     }
    #
    #     if method == 'random':
    #         parameters['GradientBoostingClassifier__learning_rate'] = expon(0,1)
    #
    #     return parameters 
Example 8
Project: pymach   Author: gusseppe   File: improve.py    MIT License 6 votes vote down vote up
def gradientboosting_param(self, method='grid'):

        parameters = {
            # 'selector__extraTC__n_estimators': [10],
            # 'selector__extraTC__n_estimators': [10, 15],
            # # 'selector__extraTC__criterion': ['entropy'],
            # 'selector__extraTC__criterion': ['gini', 'entropy'],
            # 'selector__extraTC__n_jobs': [-1],
            # 'selector__pca__svd_solver': ['randomized'],
            'selector__pca__svd_solver': ['full', 'arpack', 'randomized'],
            # 'selector__pca__whiten': [True],
            'selector__pca__whiten': [True,False],
            'GradientBoostingClassifier__n_estimators': [200, 250],
            'GradientBoostingClassifier__max_depth': [3,6,9],
            'GradientBoostingClassifier__learning_rate': [0.1, 0.2, 0.3]
        }

        if method == 'random':
            parameters['GradientBoostingClassifier__learning_rate'] = expon(0,1)

        return parameters 
Example 9
Project: carl   Author: diana-hep   File: test_mixture.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_fit():
    p1 = Normal(mu=T.constant(0.0), sigma=T.constant(2.0))
    p2 = Normal(mu=T.constant(3.0), sigma=T.constant(2.0))
    p3 = Exponential(inverse_scale=T.constant(0.5))
    g = theano.shared(0.5)
    m = Mixture(components=[p1, p2, p3], weights=[g, g*g])

    X = np.concatenate([st.norm(loc=0.0, scale=2.0).rvs(300, random_state=0),
                        st.norm(loc=3.0, scale=2.0).rvs(100, random_state=1),
                        st.expon(scale=1. / 0.5).rvs(500, random_state=2)])
    X = X.reshape(-1, 1)
    s0 = m.score(X)

    m.fit(X)
    assert np.abs(g.eval() - 1. / 3.) < 0.05
    assert m.score(X) >= s0 
Example 10
Project: gaussian-exponential-mixture   Author: ethanwh   File: gaussian_exponential_mixture.py    Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 data: np.numarray,
                 exp_loc=0.0,
                 max_iterations=100,
                 convergence_tolerance=0.001,
                 **kwargs):

        self.convergence_tolerance: float = convergence_tolerance
        self.data: np.numarray = data
        self._exp_loc: float = exp_loc
        self.parameters = GaussianExponentialParameters(**kwargs)
        self.parameters_updated = GaussianExponentialParameters(**kwargs)
        self.max_iterations: int = max_iterations
        self.expon = stats.expon(loc=self._exp_loc, scale=self.parameters.beta)
        self.norm = stats.norm(loc=self.parameters.mu, scale=self.parameters.sigma) 
Example 11
Project: gaussian-exponential-mixture   Author: ethanwh   File: gaussian_exponential_mixture.py    Apache License 2.0 5 votes vote down vote up
def _expectation_is_gaussian(self, val: float) -> float:
        gaussian_density = self.norm.pdf(val)
        exponential_density = self.expon.pdf(val)
        if exponential_density == np.nan:
            return 1
        if gaussian_density == np.nan:
            return 0
        if self.parameters.proportion == 0:
            return 0
        probability_gaussian = gaussian_density * self.parameters.proportion
        probability_exponential = exponential_density * (1 - self.parameters.proportion)
        return probability_gaussian / (probability_gaussian + probability_exponential) 
Example 12
Project: gaussian-exponential-mixture   Author: ethanwh   File: gaussian_exponential_mixture.py    Apache License 2.0 5 votes vote down vote up
def _update_proportion(self) -> None:
        """Updates the proportion of the data that is likelier gaussian.
        """
        gaussian_total = self._apply_and_sum(lambda x: np.nan_to_num(self.norm.logpdf(x)) >
                                                       np.nan_to_num(self.expon.logpdf(x)))
        self.parameters_updated.proportion = gaussian_total / len(self.data) 
Example 13
Project: gaussian-exponential-mixture   Author: ethanwh   File: gaussian_exponential_mixture.py    Apache License 2.0 5 votes vote down vote up
def _update_pdfs(self) -> None:
        """Updates PDFs of normal and exponential with new parameters.

        Since the parameters are stored separately from the PDFs for now, updates
        need to be applied on each iteration.
        """
        self.norm = stats.norm(loc=self.parameters_updated.mu, scale=self.parameters_updated.sigma)
        self.expon = stats.expon(loc=self._exp_loc, scale=self.parameters_updated.beta) 
Example 14
Project: gaussian-exponential-mixture   Author: ethanwh   File: gaussian_exponential_mixture.py    Apache License 2.0 5 votes vote down vote up
def pdf(self, val):
        """Evaluates the density of the pdf of the GaussianExponentialMixture.
        """
        return (1 - self.parameters.proportion) * self.expon.pdf(val) + self.parameters.proportion * self.norm.pdf(val) 
Example 15
Project: deep_image_model   Author: tobegit3hub   File: exponential_test.py    Apache License 2.0 5 votes vote down vote up
def testExponentialCDF(self):
    with tf.Session():
      batch_size = 6
      lam = tf.constant([2.0] * batch_size)
      lam_v = 2.0
      x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32)

      exponential = tf.contrib.distributions.Exponential(lam=lam)
      expected_cdf = stats.expon.cdf(x, scale=1 / lam_v)

      cdf = exponential.cdf(x)
      self.assertEqual(cdf.get_shape(), (6,))
      self.assertAllClose(cdf.eval(), expected_cdf) 
Example 16
Project: deep_image_model   Author: tobegit3hub   File: exponential_test.py    Apache License 2.0 5 votes vote down vote up
def testExponentialMean(self):
    with tf.Session():
      lam_v = np.array([1.0, 4.0, 2.5])
      expected_mean = stats.expon.mean(scale=1 / lam_v)
      exponential = tf.contrib.distributions.Exponential(lam=lam_v)
      self.assertEqual(exponential.mean().get_shape(), (3,))
      self.assertAllClose(exponential.mean().eval(), expected_mean) 
Example 17
Project: deep_image_model   Author: tobegit3hub   File: exponential_test.py    Apache License 2.0 5 votes vote down vote up
def testExponentialVariance(self):
    with tf.Session():
      lam_v = np.array([1.0, 4.0, 2.5])
      expected_variance = stats.expon.var(scale=1 / lam_v)
      exponential = tf.contrib.distributions.Exponential(lam=lam_v)
      self.assertEqual(exponential.variance().get_shape(), (3,))
      self.assertAllClose(exponential.variance().eval(), expected_variance) 
Example 18
Project: deep_image_model   Author: tobegit3hub   File: exponential_test.py    Apache License 2.0 5 votes vote down vote up
def testExponentialEntropy(self):
    with tf.Session():
      lam_v = np.array([1.0, 4.0, 2.5])
      expected_entropy = stats.expon.entropy(scale=1 / lam_v)
      exponential = tf.contrib.distributions.Exponential(lam=lam_v)
      self.assertEqual(exponential.entropy().get_shape(), (3,))
      self.assertAllClose(exponential.entropy().eval(), expected_entropy) 
Example 19
Project: carme   Author: CarmeLabs   File: modelgeneration.py    MIT License 5 votes vote down vote up
def findBestDistribution(df):
        """Finds the best fit for each column and returns the associated parameters

        Arguments:
            df { DataFrame } -- The data matrix

        Returns:
            (best_dist_name, pvalue, params)
                - best_dist_name: List of best fitted graph for each column
                - pvalue: The associated Pvalue generated from the KSTest
                - params: The parameters associated with the best fitted
                        graph (e.g. min&max, alpha&beta)
        """
        dist_names = ['truncnorm', 'beta', 'expon', 'uniform']
        best_dist_name = [0] * len(df.columns)
        pvalues = [0] * len(df.columns)
        params = [0] * len(df.columns)
        for col_num in range(len(df.columns)):
            dist_tests = []
            param_tests = {}
            column = df[df.columns[col_num]]
            for dist_name in dist_names:
                dist = getattr(scipy.stats, dist_name)
                # Fit the data to the shape
                param = dist.fit(column)
                param_tests[dist_name] = param
                # Apply kstest
                dist, pv = scipy.stats.kstest(column, dist_name, args=param)
                dist_tests.append((dist_name, pv))
            # Select best distribution (Highest pvalue)
            best_dist, best_pv = (max(dist_tests, key=lambda item: item[1]))
            best_param = param_tests[best_dist]
            best_dist_name[col_num] = best_dist
            pvalues[col_num] = best_pv
            params[col_num] = best_param
        return best_dist_name, pvalues, params 
Example 20
Project: carl   Author: diana-hep   File: test_exponential.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def check_exponential(inverse_scale):
    rng = check_random_state(1)

    p_carl = Exponential(inverse_scale=inverse_scale)
    p_scipy = st.expon(scale=1. / inverse_scale)
    X = rng.rand(50, 1)

    assert_array_almost_equal(p_carl.pdf(X),
                              p_scipy.pdf(X.ravel()))
    assert_array_almost_equal(p_carl.cdf(X),
                              p_scipy.cdf(X.ravel()))
    assert_array_almost_equal(-np.log(p_carl.pdf(X)),
                              p_carl.nll(X)) 
Example 21
Project: carl   Author: diana-hep   File: test_exponential.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def check_fit(inverse_scale):
    p = Exponential()
    X = st.expon(scale=1. / inverse_scale).rvs(5000,
                                               random_state=0).reshape(-1, 1)
    p.fit(X)
    assert np.abs(p.inverse_scale.get_value() - inverse_scale) <= 0.1 
Example 22
Project: Effective-Quadratures   Author: Effective-Quadratures   File: exponential.py    GNU Lesser General Public License v2.1 5 votes vote down vote up
def __init__(self, rate=None):
        self.rate = rate
        if (self.rate is not None) and (self.rate > 0.0):
            #self.mean = 1. / self.rate
            #self.variance = 1./(self.rate)**2
            self.skewness = 2.0
            self.kurtosis = 6.0
            self.bounds = np.array([0.0, np.inf])
            self.x_range_for_pdf = np.linspace(0.0, 20*self.rate, RECURRENCE_PDF_SAMPLES)
            self.parent = expon(scale=1.0/rate)
            self.mean = self.parent.mean()
            self.variance = self.parent.var() 
Example 23
Project: linear_neuron   Author: uglyboxer   File: test_grid_search.py    MIT License 5 votes vote down vote up
def test_randomized_search_grid_scores():
    # Make a dataset with a lot of noise to get various kind of prediction
    # errors across CV folds and parameter settings
    X, y = make_classification(n_samples=200, n_features=100, n_informative=3,
                               random_state=0)

    # XXX: as of today (scipy 0.12) it's not possible to set the random seed
    # of scipy.stats distributions: the assertions in this test should thus
    # not depend on the randomization
    params = dict(C=expon(scale=10),
                  gamma=expon(scale=0.1))
    n_cv_iter = 3
    n_search_iter = 30
    search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_cv_iter,
                                param_distributions=params, iid=False)
    search.fit(X, y)
    assert_equal(len(search.grid_scores_), n_search_iter)

    # Check consistency of the structure of each cv_score item
    for cv_score in search.grid_scores_:
        assert_equal(len(cv_score.cv_validation_scores), n_cv_iter)
        # Because we set iid to False, the mean_validation score is the
        # mean of the fold mean scores instead of the aggregate sample-wise
        # mean score
        assert_almost_equal(np.mean(cv_score.cv_validation_scores),
                            cv_score.mean_validation_score)
        assert_equal(list(sorted(cv_score.parameters.keys())),
                     list(sorted(params.keys())))

    # Check the consistency with the best_score_ and best_params_ attributes
    sorted_grid_scores = list(sorted(search.grid_scores_,
                              key=lambda x: x.mean_validation_score))
    best_score = sorted_grid_scores[-1].mean_validation_score
    assert_equal(search.best_score_, best_score)

    tied_best_params = [s.parameters for s in sorted_grid_scores
                        if s.mean_validation_score == best_score]
    assert_true(search.best_params_ in tied_best_params,
                "best_params_={0} is not part of the"
                " tied best models: {1}".format(
                    search.best_params_, tied_best_params)) 
Example 24
Project: Weiss   Author: WangWenjun559   File: test_grid_search.py    Apache License 2.0 5 votes vote down vote up
def test_randomized_search_grid_scores():
    # Make a dataset with a lot of noise to get various kind of prediction
    # errors across CV folds and parameter settings
    X, y = make_classification(n_samples=200, n_features=100, n_informative=3,
                               random_state=0)

    # XXX: as of today (scipy 0.12) it's not possible to set the random seed
    # of scipy.stats distributions: the assertions in this test should thus
    # not depend on the randomization
    params = dict(C=expon(scale=10),
                  gamma=expon(scale=0.1))
    n_cv_iter = 3
    n_search_iter = 30
    search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_cv_iter,
                                param_distributions=params, iid=False)
    search.fit(X, y)
    assert_equal(len(search.grid_scores_), n_search_iter)

    # Check consistency of the structure of each cv_score item
    for cv_score in search.grid_scores_:
        assert_equal(len(cv_score.cv_validation_scores), n_cv_iter)
        # Because we set iid to False, the mean_validation score is the
        # mean of the fold mean scores instead of the aggregate sample-wise
        # mean score
        assert_almost_equal(np.mean(cv_score.cv_validation_scores),
                            cv_score.mean_validation_score)
        assert_equal(list(sorted(cv_score.parameters.keys())),
                     list(sorted(params.keys())))

    # Check the consistency with the best_score_ and best_params_ attributes
    sorted_grid_scores = list(sorted(search.grid_scores_,
                              key=lambda x: x.mean_validation_score))
    best_score = sorted_grid_scores[-1].mean_validation_score
    assert_equal(search.best_score_, best_score)

    tied_best_params = [s.parameters for s in sorted_grid_scores
                        if s.mean_validation_score == best_score]
    assert_true(search.best_params_ in tied_best_params,
                "best_params_={0} is not part of the"
                " tied best models: {1}".format(
                    search.best_params_, tied_best_params)) 
Example 25
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_search.py    Apache License 2.0 5 votes vote down vote up
def test_random_search_cv_results():
    X, y = make_classification(n_samples=50, n_features=4, random_state=42)

    n_splits = 3
    n_search_iter = 30

    params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
    param_keys = ('param_C', 'param_gamma')
    score_keys = ('mean_test_score', 'mean_train_score',
                  'rank_test_score',
                  'split0_test_score', 'split1_test_score',
                  'split2_test_score',
                  'split0_train_score', 'split1_train_score',
                  'split2_train_score',
                  'std_test_score', 'std_train_score',
                  'mean_fit_time', 'std_fit_time',
                  'mean_score_time', 'std_score_time')
    n_cand = n_search_iter

    for iid in (False, True):
        search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_splits,
                                    iid=iid, param_distributions=params)
        search.fit(X, y)
        assert_equal(iid, search.iid)
        cv_results = search.cv_results_
        # Check results structure
        check_cv_results_array_types(search, param_keys, score_keys)
        check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
        # For random_search, all the param array vals should be unmasked
        assert_false(any(cv_results['param_C'].mask) or
                     any(cv_results['param_gamma'].mask))
        check_cv_results_grid_scores_consistency(search) 
Example 26
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_grid_search.py    Apache License 2.0 5 votes vote down vote up
def test_randomized_search_grid_scores():
    # Make a dataset with a lot of noise to get various kind of prediction
    # errors across CV folds and parameter settings
    X, y = make_classification(n_samples=200, n_features=100, n_informative=3,
                               random_state=0)

    # XXX: as of today (scipy 0.12) it's not possible to set the random seed
    # of scipy.stats distributions: the assertions in this test should thus
    # not depend on the randomization
    params = dict(C=expon(scale=10),
                  gamma=expon(scale=0.1))
    n_cv_iter = 3
    n_search_iter = 30
    search = RandomizedSearchCV(SVC(), n_iter=n_search_iter, cv=n_cv_iter,
                                param_distributions=params, iid=False)
    search.fit(X, y)
    assert_equal(len(search.grid_scores_), n_search_iter)

    # Check consistency of the structure of each cv_score item
    for cv_score in search.grid_scores_:
        assert_equal(len(cv_score.cv_validation_scores), n_cv_iter)
        # Because we set iid to False, the mean_validation score is the
        # mean of the fold mean scores instead of the aggregate sample-wise
        # mean score
        assert_almost_equal(np.mean(cv_score.cv_validation_scores),
                            cv_score.mean_validation_score)
        assert_equal(list(sorted(cv_score.parameters.keys())),
                     list(sorted(params.keys())))

    # Check the consistency with the best_score_ and best_params_ attributes
    sorted_grid_scores = list(sorted(search.grid_scores_,
                              key=lambda x: x.mean_validation_score))
    best_score = sorted_grid_scores[-1].mean_validation_score
    assert_equal(search.best_score_, best_score)

    tied_best_params = [s.parameters for s in sorted_grid_scores
                        if s.mean_validation_score == best_score]
    assert_true(search.best_params_ in tied_best_params,
                "best_params_={0} is not part of the"
                " tied best models: {1}".format(
                    search.best_params_, tied_best_params)) 
Example 27
Project: mousestyles   Author: berkeley-stat222   File: test_kolmogorov_test.py    BSD 2-Clause "Simplified" License 5 votes vote down vote up
def test_perform_kstest2():
    # check the kolgomorov test for the exponential distribution
    x = get_travel_distances(1, 1, 1)
    res = perform_kstest(x, distribution=stats.expon, verbose=True)
    assert type(res) is np.ndarray
    assert res.shape == (2,)
    # Since our mouse distances data is truncated at 1, the loc parameter in
    # exponential distribution should be close to 1
    assert np.abs(res[0] - 1) <= .1 
Example 28
Project: dislib   Author: bsc-wdc   File: test_randomizedsearch.py    Apache License 2.0 5 votes vote down vote up
def test_fit(self):
        """Tests RandomizedSearchCV fit()."""
        x_np, y_np = datasets.load_iris(return_X_y=True)
        p = np.random.permutation(len(x_np))  # Pre-shuffling required for CSVM
        x = ds.array(x_np[p], (30, 4))
        y = ds.array((y_np[p] == 0)[:, np.newaxis], (30, 1))
        param_distributions = {'c': stats.expon(scale=0.5),
                               'gamma': stats.expon(scale=1)}
        csvm = CascadeSVM()
        n_iter = 12
        k = 3
        searcher = RandomizedSearchCV(estimator=csvm,
                                      param_distributions=param_distributions,
                                      n_iter=n_iter, cv=k, random_state=0)
        searcher.fit(x, y)

        expected_keys = {'param_c', 'param_gamma', 'params', 'mean_test_score',
                         'std_test_score', 'rank_test_score'}
        split_keys = {'split%d_test_score' % i for i in range(k)}
        expected_keys.update(split_keys)

        self.assertSetEqual(set(searcher.cv_results_.keys()), expected_keys)
        self.assertEqual(len(searcher.cv_results_['param_c']), n_iter)
        self.assertTrue(hasattr(searcher, 'best_estimator_'))
        self.assertTrue(hasattr(searcher, 'best_score_'))
        self.assertTrue(hasattr(searcher, 'best_params_'))
        self.assertTrue(hasattr(searcher, 'best_index_'))
        self.assertTrue(hasattr(searcher, 'scorer_'))
        self.assertEqual(searcher.n_splits_, k) 
Example 29
Project: ngboost   Author: stanfordmlgroup   File: exponential.py    Apache License 2.0 5 votes vote down vote up
def __init__(self, params):
        self.scale = np.exp(params[0])
        self.dist = dist(scale=self.scale) 
Example 30
Project: ngboost   Author: stanfordmlgroup   File: exponential.py    Apache License 2.0 5 votes vote down vote up
def __getattr__(self, name):
        return getattr(self.dist, name) 
Example 31
Project: ngboost   Author: stanfordmlgroup   File: exponential.py    Apache License 2.0 5 votes vote down vote up
def nll(self, Y):
        E, T = Y["Event"], Y["Time"]
        cens = (1-E) * np.log(1 - self.dist.cdf(T) + eps)
        uncens = E * self.dist.logpdf(T)
        return -(cens + uncens) 
Example 32
Project: yakddcup2015   Author: its-fun   File: modeling.py    GNU General Public License v2.0 5 votes vote down vote up
def svc_appr():
    """
    Best params: {'C': 0.022139881953014046}

    Submission:
    E_val:
    E_in:
    E_out:
    """
    from sklearn.svm import LinearSVC
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    from sklearn.cross_validation import StratifiedKFold
    from sklearn.grid_search import RandomizedSearchCV
    from scipy.stats import expon

    X, y = dataset.load_train()

    raw_scaler = StandardScaler()
    raw_scaler.fit(X)
    X_scaled = raw_scaler.transform(X)

    svc = LinearSVC(dual=False, class_weight='auto')
    rs = RandomizedSearchCV(svc, n_iter=50, scoring='roc_auc', n_jobs=-1,
                            cv=StratifiedKFold(y, 5), verbose=2,
                            param_distributions={'C': expon()})
    rs.fit(X_scaled, y)

    logger.debug('Got best SVC.')
    logger.debug('Best params: %s', rs.best_params_)
    logger.debug('Grid scores:')
    for i, grid_score in enumerate(rs.grid_scores_):
        print('\t%s' % grid_score)
    logger.debug('Best score (E_val): %s', rs.best_score_)
    logger.debug('E_in: %f', Util.auc_score(rs, X_scaled, y)) 
Example 33
Project: pgMapMatch   Author: amillb   File: mapmatcher.py    MIT License 5 votes vote down vote up
def temporalLL(travelcostratio):
    """Log likelihood function for the transition between different edges
    Input is ratio of implied speed to speed limit"""
    if isinstance(travelcostratio, list):
        travelcostratio = np.array(travelcostratio)
    if isinstance(travelcostratio, np.ndarray):
        retvals = stats.expon(scale=temporal_scale).logpdf(travelcostratio)
        retvals[travelcostratio > 1] = (stats.norm(1, scale=sigma_t).logpdf(travelcostratio[travelcostratio > 1])+temporalLL_ratio)
        return retvals*temporal_weight
    else:  # scalar
        if travelcostratio <= 1:
            return stats.expon(scale=temporal_scale).logpdf(travelcostratio)*temporal_weight
        else:
            return (stats.norm(1, scale=sigma_t).logpdf(travelcostratio)+temporalLL_ratio)*temporal_weight 
Example 34
Project: mousestyles   Author: berkeley-stat222   File: hist_kolmogorov.py    BSD 2-Clause "Simplified" License 4 votes vote down vote up
def plot_histogram(strain=0, mouse=0, day=0, distribution=stats.pareto,
                   n_hist_bins=50, loc_legend="best"):
    """ Plot the histogram of distances travelled by a mouse in a day within 20
    milliseconds.
    We then fit a distribution as specified by the argument distribution,
    using Maximum Likelihood, and plot the histgoram of that fitted
    distribution.
    Finally, we fit a distribution by minimizing CDF distance, and plot the
    histogram of that fitted distribution.

    In addition to the histograms (PDF), the CDF are also provided.
    Parameters:
    -----------
    strain: int
        Denote the strain of mouse to plot
    mouse: int
        Denote the the mouse id (what twin) within the strain
    day: int
        Denote the day to plot
    distribution: scipy.stats.rv_continuous object
        {stats.pareto, stats.expon, stats.gamma}
        A distribution to fit the data to. Currently supporting three
        distributions.
    n_hist_bins: int
        Number of bins for plotting histogram
    loc_legend: string, int
        {0, 1, 2, ..., 10} or {"best", "upper right", "upper left", ...}
        Location for plotting legend as in plt.legend function
    """
    dist_name = distribution.__class__.__name__.split("_")[0]
    distances = get_travel_distances(strain, mouse, day)
    mle_params = distribution.fit(distances)
    opt_params = perform_kstest(distances, distribution, verbose=False)
    mle_data = distribution(*mle_params).rvs(len(distances))
    opt_data = distribution(*opt_params).rvs(len(distances))

    bins = np.linspace(min(distances), max(distances), n_hist_bins)
    # Plot The Empirical PDF and CDF
    counts, b, _ = plt.hist(distances, bins=bins, alpha=.4, color="green",
                            histtype="step")
    scale = np.max(counts)
    plt.plot(b, np.concatenate([counts.cumsum() / np.sum(counts) * scale,
             [scale]]), color="green", drawstyle="steps-post", lw=2)
    # Plot the MLE PDF and CDF
    counts, b, _ = plt.hist(mle_data, bins=bins, alpha=.4, color="blue",
                            histtype="step")
    plt.plot(b, np.concatenate([counts.cumsum() / np.sum(counts) * scale,
             [scale]]), color="blue", drawstyle="steps-post", lw=2)
    # Plot the Opt PDF and CDF
    counts, b, _ = plt.hist(opt_data, bins=bins, alpha=.4, color="red",
                            histtype="step")
    plt.plot(b, np.concatenate([counts.cumsum() / np.sum(counts) * scale,
             [scale]]), color="red", drawstyle="steps-post", lw=2)
    plt.legend(["Emp CDF", "MLE CDF", "Opt CDF",
                "Emp PDF", "MLE PDF", "Opt PDF"], loc=loc_legend)
    plt.title("Fitting Distribution {} to Data".format(dist_name))
    plt.xlim(1., 1.6) 
Example 35
Project: yakddcup2015   Author: its-fun   File: modeling.py    GNU General Public License v2.0 4 votes vote down vote up
def svc():
    """
    Submission: svc_0703_04.csv
    E_val:
    E_in:
    E_out:
    """
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    from sklearn.svm import SVC
    from sklearn.cross_validation import StratifiedKFold
    from sklearn.grid_search import RandomizedSearchCV
    from sklearn.calibration import CalibratedClassifierCV
    from scipy.stats import expon

    X, y = dataset.load_train()

    raw_scaler = StandardScaler()
    raw_scaler.fit(X)
    X_scaled = raw_scaler.transform(X)

    svc = SVC(kernel='linear', class_weight='auto')
    rs = RandomizedSearchCV(svc, n_iter=50, scoring='roc_auc', n_jobs=-1,
                            cv=StratifiedKFold(y, 5),
                            param_distributions={'C': expon()})
    rs.fit(X_scaled, y)

    logger.debug('Got best SVC.')
    logger.debug('Grid scores: %s', rs.grid_scores_)
    logger.debug('Best score (E_val): %s', rs.best_score_)
    logger.debug('Best params: %s', rs.best_params_)
    IO.cache(rs, Path.of_cache('svc.RandomizedSearchCV.SVC.pkl'))

    svc = rs.best_estimator_
    IO.cache(rs, Path.of_cache('svc.SVC.pkl'))

    isotonic = CalibratedClassifierCV(svc, cv=StratifiedKFold(y, 5),
                                      method='isotonic')
    isotonic.fit(X_scaled, y)
    IO.cache(rs, Path.of_cache('svc.CalibratedClassifierCV.isotonic.pkl'))

    logger.debug('Got best isotonic CalibratedClassifier.')
    logger.debug('E_in (isotonic): %f', Util.auc_score(isotonic, X_scaled, y))

    IO.dump_submission(Pipeline([('scale_raw', raw_scaler),
                                 ('svc', isotonic)]), 'svc_0703_04')