Python scipy.stats.binom() Examples

The following are code examples for showing how to use scipy.stats.binom(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: dockerizeme   Author: dockerizeme   File: snippet.py    Apache License 2.0 6 votes vote down vote up
def mcnemar_midp(b, c):
    """
    Compute McNemar's test using the "mid-p" variant suggested by:
    
    M.W. Fagerland, S. Lydersen, P. Laake. 2013. The McNemar test for 
    binary matched-pairs data: Mid-p and asymptotic are better than exact 
    conditional. BMC Medical Research Methodology 13: 91.
    
    `b` is the number of observations correctly labeled by the first---but 
    not the second---system; `c` is the number of observations correctly 
    labeled by the second---but not the first---system.
    """
    n = b + c
    x = min(b, c)
    dist = binom(n, .5)
    p = 2. * dist.cdf(x)
    midp = p - dist.pmf(x)
    return midp 
Example 2
Project: ThredgeCorr   Author: benmaier   File: simulation.py    MIT License 6 votes vote down vote up
def simulation_code(kw):

    n = int(kw['n'])
    rho = float(kw['rho'])
    k = float(kw['k'])

    kmax = n-1

    if k < n-1:
        t = solve_t(k, n)
        if rho > 0.0:
            p_exact = np.array([ float(_p) for _p in pk(n, t, rho, kmax) ])
            p_asymptotic = pk_asymptotic(np.arange(1,kmax,dtype=float), n, t, rho)
        else:
            P = k / (n-1.0)
            rv = binom(n-1, P)
            p_exact = rv.pmf(np.arange(kmax+1))
            p_asymptotic = np.array([])
    else:
        p_exact = np.array([])
        p_asymptotic = np.array([])

    return [p_exact, p_asymptotic] 
Example 3
Project: cprior   Author: guillermo-navas-palencia   File: beta_binomial.py    GNU Lesser General Public License v3.0 6 votes vote down vote up
def rvs(self, size=1, random_state=None):
        """
        Beta-binomial random variates.

        Parameters
        ----------
        size : int (default=1)
            Number of random variates.

        random_state : int or None (default=None)
            The seed used by the random number generator.

        Returns
        -------
        rvs : numpy.ndarray
            Random variates of given size.
        """
        p = stats.beta(self.a, self.b).rvs(size=size,
                                           random_state=random_state)

        return stats.binom(self.n, p).rvs(size=size, random_state=random_state) 
Example 4
Project: dscontrib   Author: mozilla   File: power_analysis.py    Mozilla Public License 2.0 6 votes vote down vote up
def run_simmo(fake_results, control_rate, true_rel_change):
    lower = []
    upper = []

    for _ in range(1000):
        fake_results['num_conversions'] = st.binom(
            fake_results['num_enrollments'],
            np.array([control_rate, control_rate * (1 + true_rel_change)])
        ).rvs()

        stats = mabsbin.compare_branches_from_agg(fake_results)

        lower.append(stats['comparative']['test']['rel_uplift']['0.975'])
        upper.append(stats['comparative']['test']['rel_uplift']['0.025'])

    return pd.DataFrame({'lower': lower, 'upper': upper}).quantile([0.025, 0.975]) 
Example 5
Project: gmaneLegacy   Author: ttm   File: networkPartitioning.py    The Unlicense 5 votes vote down vote up
def makeBinomialDistribution(self,prob,max_degree_possible,incident_degrees_):
        """If max_degree_possible == max_degree_empirical, makeBinomial ==1"""
        binomial_distribution=[] # occurance probability of degrees 
        for degree in incident_degrees_:
            if len(binomial_distribution) and binomial_distribution[-1]==0.0:
                binomial_distribution.append(0.0)
            else:
                n_occurrences=special.binom(max_degree_possible,degree)
                prob_degree=n_occurrences *  (prob**degree)*((1-prob)**(max_degree_possible-degree))
                binomial_distribution.append(prob_degree)
        return binomial_distribution 
Example 6
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 7
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_pmf(self):
        # regression test for Ticket #1842
        vals1 = stats.binom.pmf(100, 100, 1)
        vals2 = stats.binom.pmf(0, 100, 0)
        assert_allclose(vals1, 1.0, rtol=1e-15, atol=0)
        assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) 
Example 8
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_warns_p0(self):
        # no spurious warnigns are generated for p=0; gh-3817
        with warnings.catch_warnings():
            warnings.simplefilter("error", RuntimeWarning)
            assert_equal(stats.binom(n=2, p=0).mean(), 0)
            assert_equal(stats.binom(n=2, p=0).std(), 0) 
Example 9
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 10
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_pmf(self):
        # regression test for Ticket #1842
        vals1 = stats.binom.pmf(100, 100,1)
        vals2 = stats.binom.pmf(0, 100,0)
        assert_allclose(vals1, 1.0, rtol=1e-15, atol=0)
        assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) 
Example 11
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 12
Project: att   Author: Centre-Alt-Rendiment-Esportiu   File: test_distributions.py    GNU General Public License v3.0 5 votes vote down vote up
def test_warns_p0(self):
        # no spurious warnigns are generated for p=0; gh-3817
        with warnings.catch_warnings():
            warnings.simplefilter("error", RuntimeWarning)
            assert_equal(stats.binom(n=2, p=0).mean(), 0)
            assert_equal(stats.binom(n=2, p=0).std(), 0) 
Example 13
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 14
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_pmf(self):
        # regression test for Ticket #1842
        vals1 = stats.binom.pmf(100, 100, 1)
        vals2 = stats.binom.pmf(0, 100, 0)
        assert_allclose(vals1, 1.0, rtol=1e-15, atol=0)
        assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) 
Example 15
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 16
Project: ble5-nrf52-mac   Author: tomasero   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_warns_p0(self):
        # no spurious warnigns are generated for p=0; gh-3817
        with warnings.catch_warnings():
            warnings.simplefilter("error", RuntimeWarning)
            assert_equal(stats.binom(n=2, p=0).mean(), 0)
            assert_equal(stats.binom(n=2, p=0).std(), 0) 
Example 17
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 18
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_pmf(self):
        # regression test for Ticket #1842
        vals1 = stats.binom.pmf(100, 100,1)
        vals2 = stats.binom.pmf(0, 100,0)
        assert_allclose(vals1, 1.0, rtol=1e-15, atol=0)
        assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) 
Example 19
Project: Computable   Author: ktraunmueller   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 20
Project: poker   Author: surgebiswas   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 21
Project: poker   Author: surgebiswas   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_pmf(self):
        # regression test for Ticket #1842
        vals1 = stats.binom.pmf(100, 100, 1)
        vals2 = stats.binom.pmf(0, 100, 0)
        assert_allclose(vals1, 1.0, rtol=1e-15, atol=0)
        assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) 
Example 22
Project: poker   Author: surgebiswas   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 23
Project: poker   Author: surgebiswas   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_warns_p0(self):
        # no spurious warnigns are generated for p=0; gh-3817
        with warnings.catch_warnings():
            warnings.simplefilter("error", RuntimeWarning)
            assert_equal(stats.binom(n=2, p=0).mean(), 0)
            assert_equal(stats.binom(n=2, p=0).std(), 0) 
Example 24
Project: P3_image_processing   Author: latedude2   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 25
Project: P3_image_processing   Author: latedude2   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_pmf(self):
        # regression test for Ticket #1842
        vals1 = stats.binom.pmf(100, 100, 1)
        vals2 = stats.binom.pmf(0, 100, 0)
        assert_allclose(vals1, 1.0, rtol=1e-15, atol=0)
        assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) 
Example 26
Project: P3_image_processing   Author: latedude2   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 27
Project: P3_image_processing   Author: latedude2   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_warns_p0(self):
        # no spurious warnigns are generated for p=0; gh-3817
        with warnings.catch_warnings():
            warnings.simplefilter("error", RuntimeWarning)
            assert_equal(stats.binom(n=2, p=0).mean(), 0)
            assert_equal(stats.binom(n=2, p=0).std(), 0) 
Example 28
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 29
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_pmf(self):
        # regression test for Ticket #1842
        vals1 = stats.binom.pmf(100, 100, 1)
        vals2 = stats.binom.pmf(0, 100, 0)
        assert_allclose(vals1, 1.0, rtol=1e-15, atol=0)
        assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) 
Example 30
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 31
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_distributions.py    MIT License 5 votes vote down vote up
def test_warns_p0(self):
        # no spurious warnigns are generated for p=0; gh-3817
        with warnings.catch_warnings():
            warnings.simplefilter("error", RuntimeWarning)
            assert_equal(stats.binom(n=2, p=0).mean(), 0)
            assert_equal(stats.binom(n=2, p=0).std(), 0) 
Example 32
Project: dscontrib   Author: mozilla   File: power_analysis.py    Mozilla Public License 2.0 5 votes vote down vote up
def set_up_fake_results_df(total_num_enrollments, test_branch_prop):
    t_pop = st.binom(total_num_enrollments, test_branch_prop).rvs()

    return pd.DataFrame(
        {'num_enrollments': [total_num_enrollments - t_pop, t_pop]},
        index=['control', 'test']
    ) 
Example 33
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_distributions.py    Apache License 2.0 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 34
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_distributions.py    Apache License 2.0 5 votes vote down vote up
def test_pmf(self):
        # regression test for Ticket #1842
        vals1 = stats.binom.pmf(100, 100, 1)
        vals2 = stats.binom.pmf(0, 100, 0)
        assert_allclose(vals1, 1.0, rtol=1e-15, atol=0)
        assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) 
Example 35
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_distributions.py    Apache License 2.0 5 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 36
Project: Renormalizer   Author: shuaigroup   File: phonon.py    Apache License 2.0 5 votes vote down vote up
def split(self, n=2, width: Quantity=Quantity(10, "cm-1")) -> List["Phonon"]:
        assert self.is_simple
        rv = binom(n-1, 0.5)
        width = width.as_au()
        step = 2 * width / (n - 1)
        omegas = np.linspace(self.omega[0] - width, self.omega[0] + width + step, n)
        phonons = []
        for i, omega in enumerate(omegas):
            lam = rv.pmf(i) * self.reorganization_energy
            ph = Phonon.simplest_phonon(Quantity(omega), lam, lam=True)
            phonons.append(ph)
        return phonons 
Example 37
Project: senior-design   Author: james-tate   File: test_distributions.py    GNU General Public License v2.0 5 votes vote down vote up
def test_rvs(self):
        vals = stats.binom.rvs(10, 0.75, size=(2, 50))
        assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10))
        assert_(numpy.shape(vals) == (2, 50))
        assert_(vals.dtype.char in typecodes['AllInteger'])
        val = stats.binom.rvs(10, 0.75)
        assert_(isinstance(val, int))
        val = stats.binom(10, 0.75).rvs(3)
        assert_(isinstance(val, numpy.ndarray))
        assert_(val.dtype.char in typecodes['AllInteger']) 
Example 38
Project: gmaneLegacy   Author: ttm   File: networkPartitioning.py    The Unlicense 4 votes vote down vote up
def __init__(self,networkMeasures=None, minimum_incidence=1,metric="strength"):
        if not networkMeasures:
            networkMeasures=g.NetworkMeasures()
        self.metric=metric
        metric_=self.standardizeName(metric)

        prob, max_degree_empirical, max_degree_possible = \
                self.basicMeasures( networkMeasures , metric_)

        incident_degrees, incident_degrees_, agent_degrees = \
                  self.makeDegreeLists( networkMeasures, metric_)

        empirical_distribution = self.makeEmpiricalDistribution(
            incident_degrees, incident_degrees_, networkMeasures.N )

        binomial_distribution=self.makeBinomialDistribution(
                   prob, max_degree_possible, incident_degrees_)

        binomial=stats.binom(max_degree_possible,prob)

        #sectorialized_degrees= self.sectorializeDegrees(
        # empirical_distribution, binomial_distribution, incident_degrees_)

        #sectorialized_degrees_= self.newSectorializeDegrees(
        # empirical_distribution, binomial_distribution, incident_degrees_)

        sectorialized_degrees__= self.newerSectorializeDegrees(
              empirical_distribution, binomial, incident_degrees_,
              max_degree_empirical,minimum_incidence,networkMeasures.N )

        #sectorialized_agents= self.sectorializeAgents(
        #     sectorialized_degrees, networkMeasures.degrees)

        #sectorialized_agents_= self.sectorializeAgents(
        #     sectorialized_degrees_, networkMeasures.degrees)

        sectorialized_agents__= self.sectorializeAgents(
             sectorialized_degrees__, agent_degrees)

        NetworkPartitioning.network_count+=1 # to keep track of how may partitions have been done

        self.makeSelf("incident_degrees_     ",incident_degrees_     ,
                      "incident_degrees     ",incident_degrees     ,
                      #"sectorialized_agents  ",sectorialized_agents  ,
                      #"sectorialized_agents_  ",sectorialized_agents_  ,
                      "sectorialized_agents__  ",sectorialized_agents__  ,
                      #"sectorialized_degrees ",sectorialized_degrees ,
                      #"sectorialized_degrees_ ",sectorialized_degrees_ ,
                      "sectorialized_degrees__ ",sectorialized_degrees__ ,
                      "binomial_distribution ",binomial_distribution ,
                      "prob"                  ,prob,
                      "max"                   ,(max_degree_possible, max_degree_empirical),
                      "empirical_distribution",empirical_distribution,
                      "binomial",binomial,
                      "metric_",metric_,
                      "minimum_incidence",minimum_incidence,
                      "binomial_distribution" ,binomial_distribution) 
Example 39
Project: LaserTOF   Author: kyleuckert   File: test_distributions.py    MIT License 4 votes vote down vote up
def test_entropy(self):
        # Basic entropy tests.
        b = stats.binom(2, 0.5)
        expected_p = np.array([0.25, 0.5, 0.25])
        expected_h = -sum(xlogy(expected_p, expected_p))
        h = b.entropy()
        assert_allclose(h, expected_h)

        b = stats.binom(2, 0.0)
        h = b.entropy()
        assert_equal(h, 0.0)

        b = stats.binom(2, 1.0)
        h = b.entropy()
        assert_equal(h, 0.0) 
Example 40
Project: ctfdist   Author: ustunb   File: toy_helper.py    BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def generate_toy_dataset_binom(coefs, px, limits, n_samples, group_label = 0):

    coefs = np.array(coefs, dtype = np.float).flatten()
    n_dim = len(coefs)
    assert n_dim >= 1

    if isinstance(px, float):
        px = np.array([px], dtype = np.float)
    elif isinstance(px, list):
        px = np.array(px, dtype = np.float)

    if len(px) == 1:
        px = np.repeat(px, n_dim)

    assert len(px) == n_dim

    if isinstance(limits, (int, float)):
        limits = np.array([limits], dtype = np.int)
    elif isinstance(limits, list):
        limits = np.array(limits, dtype = np.int)

    if len(limits) == 1:
        limits = np.repeat(limits, n_dim)

    assert len(limits) == n_dim

    X = [binom(n = limits[i], p = px[i]) for i in range(n_dim)]
    generate_x = lambda n: np.vstack((x.rvs(n) for x in X)).transpose()

    def get_px(x):
        if x.ndim == 1:
            p = [p.pmf(x[i]) for i, p in enumerate(X)]
            return np.exp(np.sum(np.log(p)))
        else:
            p = [p.pmf(x[:, i]) for i, p in enumerate(X)]
            return np.exp(np.sum(np.log(p), axis = 0))


    get_py = lambda x: logit(np.dot(x, coefs))
    simulate_uniform = lambda p: np.greater(p, np.random.uniform(0.0, 1.0, p.shape))
    generate_y = lambda x: 2.0 * simulate_uniform(get_py(x)) - 1.0
    get_y = lambda x: 2.0 * np.greater(get_py(x), 0.5) - 1.0


    # build data frame
    x_names = ['x%d' % (j + 1) for j in range(n_dim)]
    x = generate_x(n_samples)
    y = generate_y(x)
    df = pd.DataFrame(x, columns = x_names)
    df.insert(0, 's', group_label)
    df.insert(0, 'y', y)

    handles = {'generate_x': generate_x,
               'generate_y': generate_y,
               'get_px': get_px,
               'get_py': get_py,
               'get_y': get_y}

    return df, handles 
Example 41
Project: linear_neuron   Author: uglyboxer   File: test_cross_validation.py    MIT License 4 votes vote down vote up
def test_stratified_shuffle_split_even():
    # Test the StratifiedShuffleSplit, indices are drawn with a
    # equal chance
    n_folds = 5
    n_iter = 1000

    def assert_counts_are_ok(idx_counts, p):
        # Here we test that the distribution of the counts
        # per index is close enough to a binomial
        threshold = 0.05 / n_splits
        bf = stats.binom(n_splits, p)
        for count in idx_counts:
            p = bf.pmf(count)
            assert_true(p > threshold,
                        "An index is not drawn with chance corresponding "
                        "to even draws")

    for n_samples in (6, 22):
        labels = np.array((n_samples // 2) * [0, 1])
        splits = cval.StratifiedShuffleSplit(labels, n_iter=n_iter,
                                             test_size=1. / n_folds,
                                             random_state=0)

        train_counts = [0] * n_samples
        test_counts = [0] * n_samples
        n_splits = 0
        for train, test in splits:
            n_splits += 1
            for counter, ids in [(train_counts, train), (test_counts, test)]:
                for id in ids:
                    counter[id] += 1
        assert_equal(n_splits, n_iter)

        assert_equal(len(train), splits.n_train)
        assert_equal(len(test), splits.n_test)
        assert_equal(len(set(train).intersection(test)), 0)

        label_counts = np.unique(labels)
        assert_equal(splits.test_size, 1.0 / n_folds)
        assert_equal(splits.n_train + splits.n_test, len(labels))
        assert_equal(len(label_counts), 2)
        ex_test_p = float(splits.n_test) / n_samples
        ex_train_p = float(splits.n_train) / n_samples

        assert_counts_are_ok(train_counts, ex_train_p)
        assert_counts_are_ok(test_counts, ex_test_p) 
Example 42
Project: Weiss   Author: WangWenjun559   File: test_cross_validation.py    Apache License 2.0 4 votes vote down vote up
def test_stratified_shuffle_split_even():
    # Test the StratifiedShuffleSplit, indices are drawn with a
    # equal chance
    n_folds = 5
    n_iter = 1000

    def assert_counts_are_ok(idx_counts, p):
        # Here we test that the distribution of the counts
        # per index is close enough to a binomial
        threshold = 0.05 / n_splits
        bf = stats.binom(n_splits, p)
        for count in idx_counts:
            p = bf.pmf(count)
            assert_true(p > threshold,
                        "An index is not drawn with chance corresponding "
                        "to even draws")

    for n_samples in (6, 22):
        labels = np.array((n_samples // 2) * [0, 1])
        splits = cval.StratifiedShuffleSplit(labels, n_iter=n_iter,
                                             test_size=1. / n_folds,
                                             random_state=0)

        train_counts = [0] * n_samples
        test_counts = [0] * n_samples
        n_splits = 0
        for train, test in splits:
            n_splits += 1
            for counter, ids in [(train_counts, train), (test_counts, test)]:
                for id in ids:
                    counter[id] += 1
        assert_equal(n_splits, n_iter)

        assert_equal(len(train), splits.n_train)
        assert_equal(len(test), splits.n_test)
        assert_equal(len(set(train).intersection(test)), 0)

        label_counts = np.unique(labels)
        assert_equal(splits.test_size, 1.0 / n_folds)
        assert_equal(splits.n_train + splits.n_test, len(labels))
        assert_equal(len(label_counts), 2)
        ex_test_p = float(splits.n_test) / n_samples
        ex_train_p = float(splits.n_train) / n_samples

        assert_counts_are_ok(train_counts, ex_train_p)
        assert_counts_are_ok(test_counts, ex_test_p) 
Example 43
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_split.py    Apache License 2.0 4 votes vote down vote up
def test_stratified_shuffle_split_even():
    # Test the StratifiedShuffleSplit, indices are drawn with a
    # equal chance
    n_folds = 5
    n_splits = 1000

    def assert_counts_are_ok(idx_counts, p):
        # Here we test that the distribution of the counts
        # per index is close enough to a binomial
        threshold = 0.05 / n_splits
        bf = stats.binom(n_splits, p)
        for count in idx_counts:
            prob = bf.pmf(count)
            assert_true(prob > threshold,
                        "An index is not drawn with chance corresponding "
                        "to even draws")

    for n_samples in (6, 22):
        groups = np.array((n_samples // 2) * [0, 1])
        splits = StratifiedShuffleSplit(n_splits=n_splits,
                                        test_size=1. / n_folds,
                                        random_state=0)

        train_counts = [0] * n_samples
        test_counts = [0] * n_samples
        n_splits_actual = 0
        for train, test in splits.split(X=np.ones(n_samples), y=groups):
            n_splits_actual += 1
            for counter, ids in [(train_counts, train), (test_counts, test)]:
                for id in ids:
                    counter[id] += 1
        assert_equal(n_splits_actual, n_splits)

        n_train, n_test = _validate_shuffle_split(
            n_samples, test_size=1. / n_folds, train_size=1. - (1. / n_folds))

        assert_equal(len(train), n_train)
        assert_equal(len(test), n_test)
        assert_equal(len(set(train).intersection(test)), 0)

        group_counts = np.unique(groups)
        assert_equal(splits.test_size, 1.0 / n_folds)
        assert_equal(n_train + n_test, len(groups))
        assert_equal(len(group_counts), 2)
        ex_test_p = float(n_test) / n_samples
        ex_train_p = float(n_train) / n_samples

        assert_counts_are_ok(train_counts, ex_train_p)
        assert_counts_are_ok(test_counts, ex_test_p) 
Example 44
Project: wine-ml-on-aws-lambda   Author: pierreant   File: test_cross_validation.py    Apache License 2.0 4 votes vote down vote up
def test_stratified_shuffle_split_even():
    # Test the StratifiedShuffleSplit, indices are drawn with a
    # equal chance
    n_folds = 5
    n_iter = 1000

    def assert_counts_are_ok(idx_counts, p):
        # Here we test that the distribution of the counts
        # per index is close enough to a binomial
        threshold = 0.05 / n_splits
        bf = stats.binom(n_splits, p)
        for count in idx_counts:
            p = bf.pmf(count)
            assert_true(p > threshold,
                        "An index is not drawn with chance corresponding "
                        "to even draws")

    for n_samples in (6, 22):
        labels = np.array((n_samples // 2) * [0, 1])
        splits = cval.StratifiedShuffleSplit(labels, n_iter=n_iter,
                                             test_size=1. / n_folds,
                                             random_state=0)

        train_counts = [0] * n_samples
        test_counts = [0] * n_samples
        n_splits = 0
        for train, test in splits:
            n_splits += 1
            for counter, ids in [(train_counts, train), (test_counts, test)]:
                for id in ids:
                    counter[id] += 1
        assert_equal(n_splits, n_iter)

        assert_equal(len(train), splits.n_train)
        assert_equal(len(test), splits.n_test)
        assert_equal(len(set(train).intersection(test)), 0)

        label_counts = np.unique(labels)
        assert_equal(splits.test_size, 1.0 / n_folds)
        assert_equal(splits.n_train + splits.n_test, len(labels))
        assert_equal(len(label_counts), 2)
        ex_test_p = float(splits.n_test) / n_samples
        ex_train_p = float(splits.n_train) / n_samples

        assert_counts_are_ok(train_counts, ex_train_p)
        assert_counts_are_ok(test_counts, ex_test_p) 
Example 45
Project: mixedvines   Author: asnelt   File: marginal.py    GNU General Public License v3.0 4 votes vote down vote up
def fit(samples, is_continuous):
        '''
        Fits a distribution to the given samples.

        Parameters
        ----------
        samples : array_like
            Array of samples.
        is_continuous : bool
            If `True` then a continuous distribution is fitted.  Otherwise, a
            discrete distribution is fitted.

        Returns
        -------
        best_marginal : Marginal
            The distribution fitted to `samples`.
        '''
        # Mean and variance
        mean = np.mean(samples)
        var = np.var(samples)
        # Set suitable distributions
        if is_continuous:
            if np.any(samples <= 0):
                options = [norm]
            else:
                options = [norm, gamma]
        else:
            if var > mean:
                options = [poisson, binom, nbinom]
            else:
                options = [poisson, binom]
        params = np.empty(len(options), dtype=object)
        marginals = np.empty(len(options), dtype=object)
        # Fit parameters and construct marginals
        for i, dist in enumerate(options):
            if dist == poisson:
                params[i] = [mean]
            elif dist == binom:
                param_n = np.max(samples)
                param_p = np.sum(samples) / (param_n * len(samples))
                params[i] = [param_n, param_p]
            elif dist == nbinom:
                param_n = mean * mean / (var - mean)
                param_p = mean / var
                params[i] = [param_n, param_p]
            else:
                params[i] = dist.fit(samples)
            rv_mixed = dist(*params[i])
            marginals[i] = Marginal(rv_mixed)
        # Calculate Akaike information criterion
        aic = np.zeros(len(options))
        for i, marginal in enumerate(marginals):
            aic[i] = 2 * len(params[i]) \
                     - 2 * np.sum(marginal.logpdf(samples))
        best_marginal = marginals[np.argmin(aic)]
        return best_marginal