Python pymc3.sample() Examples

The following are 27 code examples of pymc3.sample(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pymc3 , or try the search function .
Example #1
Source File: cbc_hb.py    From lifestyles with MIT License 6 votes vote down vote up
def model(profiles, comparisons, selections, sample=2500, alpha_prior_std=10):
    all_attributes = pd.get_dummies(profiles).columns
    profiles_dummies = pd.get_dummies(profiles, drop_first=True)
    choices = pd.concat({profile: profiles_dummies.loc[comparisons[profile]].reset_index(drop=True) for profile in comparisons.columns}, axis=1)

    respondants = selections.columns
    n_attributes_in_model = profiles_dummies.shape[1]
    n_participants = selections.shape[1]

    with pm.Model():

        # https://www.sawtoothsoftware.com/download/ssiweb/CBCHB_Manual.pdf
        # need to include the covariance matrix as a parent of `partsworth`
        alpha = pm.Normal('alpha', 0, sd=alpha_prior_std, shape=n_attributes_in_model, testval=np.random.randn(n_attributes_in_model))
        partsworth = pm.MvNormal("partsworth", alpha, tau=np.eye(n_attributes_in_model), shape=(n_participants, n_attributes_in_model))

        cs = [_create_observation_variable(selection, choices, partsworth[i, :]) for i, (_, selection) in enumerate(selections.iteritems())]

        trace = pm.sample(sample)
    return transform_trace_to_individual_summary_statistics(trace, respondants, profiles_dummies.columns, all_attributes) 
Example #2
Source File: bayesian.py    From cryptotrader with MIT License 6 votes vote down vote up
def fit(self, X, Y, n_samples=10000, tune_steps=1000, n_jobs=4):
        with pm.Model() as self.model:
            # Priors
            std = pm.Uniform("std", 0, self.sps, testval=X.std())
            beta = pm.StudentT("beta", mu=0, lam=self.sps, nu=self.nu)
            alpha = pm.StudentT("alpha", mu=0, lam=self.sps, nu=self.nu, testval=Y.mean())
            # Deterministic model
            mean = pm.Deterministic("mean", alpha + beta * X)
            # Posterior distribution
            obs = pm.Normal("obs", mu=mean, sd=std, observed=Y)
            ## Run MCMC
            # Find search start value with maximum a posterior estimation
            start = pm.find_MAP()
            # sample posterior distribution for latent variables
            trace = pm.sample(n_samples, njobs=n_jobs, tune=tune_steps, start=start)
            # Recover posterior samples
            self.burned_trace = trace[int(n_samples / 2):] 
Example #3
Source File: helpers.py    From arviz with Apache License 2.0 5 votes vote down vote up
def pymc3_noncentered_schools(data, draws, chains):
    """Non-centered eight schools implementation for pymc3."""
    import pymc3 as pm

    with pm.Model() as model:
        mu = pm.Normal("mu", mu=0, sd=5)
        tau = pm.HalfCauchy("tau", beta=5)
        eta = pm.Normal("eta", mu=0, sd=1, shape=data["J"])
        theta = pm.Deterministic("theta", mu + tau * eta)
        pm.Normal("obs", mu=theta, sd=data["sigma"], observed=data["y"])
        trace = pm.sample(draws, chains=chains)
    return model, trace 
Example #4
Source File: helpers.py    From arviz with Apache License 2.0 5 votes vote down vote up
def _numpyro_noncentered_model(J, sigma, y=None):
    import numpyro
    import numpyro.distributions as dist

    mu = numpyro.sample("mu", dist.Normal(0, 5))
    tau = numpyro.sample("tau", dist.HalfCauchy(5))
    with numpyro.plate("J", J):
        eta = numpyro.sample("eta", dist.Normal(0, 1))
        theta = mu + tau * eta
        return numpyro.sample("obs", dist.Normal(theta, sigma), obs=y) 
Example #5
Source File: helpers.py    From arviz with Apache License 2.0 5 votes vote down vote up
def _pyro_noncentered_model(J, sigma, y=None):
    import pyro
    import pyro.distributions as dist

    mu = pyro.sample("mu", dist.Normal(0, 5))
    tau = pyro.sample("tau", dist.HalfCauchy(5))
    with pyro.plate("J", J):
        eta = pyro.sample("eta", dist.Normal(0, 1))
        theta = mu + tau * eta
        return pyro.sample("obs", dist.Normal(theta, sigma), obs=y) 
Example #6
Source File: imcmc.py    From imcmc with MIT License 5 votes vote down vote up
def sample_color(image, samples=5000, tune=1000, nchains=4):
    """Run MCMC on a color image. EXPERIMENTAL!

    Parameters
    ----------
    image : numpy.ndarray
        Image array from `load_image`.  Should have `image.ndims == 2`.

    samples : int
        Number of samples to draw from the image

    tune : int
        All chains start at the same spot, so it is good to let them wander
        apart a bit before beginning

    Returns
    -------
    pymc3.MultiTrace of samples from the image. Each sample is an (x, y) float
        of indices that were sampled, with three variables named 'red',
        'green', 'blue'.
    """

    with pm.Model():
        pm.DensityDist('red', ImageLikelihood(image[:, :, 0]), shape=2)
        pm.DensityDist('green', ImageLikelihood(image[:, :, 1]), shape=2)
        pm.DensityDist('blue', ImageLikelihood(image[:, :, 2]), shape=2)

        trace = pm.sample(samples, chains=nchains, tune=tune, step=pm.Metropolis())
    return trace 
Example #7
Source File: pymc3.py    From pyPESTO with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def sample(
            self, n_samples: int, beta: float = 1.):
        problem = self.problem
        log_post_fun = TheanoLogProbability(problem, beta)
        trace = self.trace

        x0 = None
        if self.x0 is not None:
            x0 = {x_name: val
                  for x_name, val in zip(self.problem.x_names, self.x0)}

        # create model context
        with pm.Model() as model:
            # uniform bounds
            k = [pm.Uniform(x_name, lower=lb, upper=ub)
                 for x_name, lb, ub in
                 zip(problem.get_reduced_vector(problem.x_names),
                     problem.lb, problem.ub)]

            # convert to tensor vector
            theta = tt.as_tensor_variable(k)

            # use a DensityDist for the log-posterior
            pm.DensityDist('log_post', logp=lambda v: log_post_fun(v),
                           observed={'v': theta})

            # step, by default automatically determined by pymc3
            step = None
            if self.step_function:
                step = self.step_function()

            # perform the actual sampling
            trace = pm.sample(
                draws=int(n_samples), trace=trace, start=x0, step=step,
                **self.options)

            # convert trace to inference data object
            data = az.from_pymc3(trace=trace, model=model)

        self.trace = trace
        self.data = data 
Example #8
Source File: mcmc_sampler.py    From dowhy with MIT License 5 votes vote down vote up
def fit_causal_model(self, g, df, data_types, initialization_trace=None):
        if nx.is_directed_acyclic_graph(g):
            with pm.Model() as model:
                g = self.apply_data_types(g, data_types)
                g = self.apply_parents(g)
                g = self.apply_parameters(g, df, initialization_trace=initialization_trace)
                g = self.build_bayesian_network(g, df)
                trace = pm.sample(1000, tune=1000)
        else:
            raise Exception("Graph is not a DAG!")
        return g, trace 
Example #9
Source File: bayesian_regression.py    From autoimpute with MIT License 5 votes vote down vote up
def __init__(self, **kwargs):
        """Create an instance of the BayesianLeastSquaresImputer class.

        The class requires multiple arguments necessary to create priors for
        a bayesian linear regression equation. The regression is:
        alpha + beta * X + epsilson. Because paramaters are treated as
        random variables, we must specify their distributions, including
        the parameters of those distributions. In thie init method we also
        include arguments used to sample the posterior distributions.

        Args:
            **kwargs: default keyword arguments used for bayesian analysis.
                Note - kwargs popped for default arguments defined below.
                Rest of kwargs passed as params to sampling (see pymc3).
            am (float, Optional): mean of alpha prior. Default 0.
            asd (float, Optional): std. deviation of alpha prior. Default 10.
            bm (float, Optional): mean of beta priors. Default 0.
            bsd (float, Optional): std. deviation of beta priors. Default 10.
            sig (float, Optional): parameter of sigma prior. Default 1.
            sample (int, Optional): number of posterior samples per chain.
                Default = 1000. More samples, longer to run, but better
                approximation of the posterior & chance of convergence.
            tune (int, Optional): parameter for tuning. Draws done in addition
                to sample. Default = 1000.
            init (str, Optional): MCMC algo to use for posterior sampling.
                Default = 'auto'. See pymc3 docs for more info on choices.
            fill_value (str, Optional): How to draw from the posterior to
                create imputations. Default is None. 'random' and 'mean'
                supported for explicit options.
        """
        self.am = kwargs.pop("am", 0)
        self.asd = kwargs.pop("asd", 10)
        self.bm = kwargs.pop("bm", 0)
        self.bsd = kwargs.pop("bsd", 10)
        self.sig = kwargs.pop("sig", 1)
        self.sample = kwargs.pop("sample", 1000)
        self.tune = kwargs.pop("tune", 1000)
        self.init = kwargs.pop("init", "auto")
        self.fill_value = kwargs.pop("fill_value", None)
        self.sample_kwargs = kwargs 
Example #10
Source File: hbr.py    From nispat with GNU General Public License v3.0 5 votes vote down vote up
def sample_prior_predictive(self, X, batch_effects, samples, trace=None):
        """ Function to sample from prior predictive distribution """
        
        if len(X.shape)==1:
            X = np.expand_dims(X, axis=1)
        if len(batch_effects.shape)==1:
            batch_effects = np.expand_dims(batch_effects, axis=1)
            
        self.batch_effects_num = batch_effects.shape[1]
        self.batch_effects_size = []
        for i in range(self.batch_effects_num):
            self.batch_effects_size.append(len(np.unique(batch_effects[:,i])))
                
        y = np.zeros([X.shape[0],1])
        
        if self.model_type == 'linear': 
            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs,
                     trace):
                ppc = pm.sample_prior_predictive(samples=samples) 
        elif self.model_type == 'polynomial':
            X = create_poly_basis(X, self.configs['order'])
            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs,
                     trace):
                ppc = pm.sample_prior_predictive(samples=samples) 
        elif self.model_type == 'bspline': 
            self.bsp = bspline_fit(X, self.configs['order'], self.configs['nknots'])
            X = bspline_transform(X, self.bsp)
            with hbr(X, y, batch_effects, self.batch_effects_size, self.configs,
                     trace):
                ppc = pm.sample_prior_predictive(samples=samples) 
        elif self.model_type == 'nn': 
            with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs,
                        trace):
                ppc = pm.sample_prior_predictive(samples=samples) 
        
        return ppc 
Example #11
Source File: stochastic_volatility.py    From fin with MIT License 5 votes vote down vote up
def main():

    #load data    
    returns = data.get_data_google('SPY', start='2008-5-1', end='2009-12-1')['Close'].pct_change()
    returns.plot()
    plt.ylabel('daily returns in %');
    
    with pm.Model() as sp500_model:
        
        nu = pm.Exponential('nu', 1./10, testval=5.0)
        sigma = pm.Exponential('sigma', 1./0.02, testval=0.1)
        
        s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns))                
        r = pm.StudentT('r', nu, lam=pm.math.exp(-2*s), observed=returns)
        
    
    with sp500_model:
        trace = pm.sample(2000)

    pm.traceplot(trace, [nu, sigma]);
    plt.show()
    
    plt.figure()
    returns.plot()
    plt.plot(returns.index, np.exp(trace['s',::5].T), 'r', alpha=.03)
    plt.legend(['S&P500', 'stochastic volatility process'])
    plt.show() 
Example #12
Source File: tStudentProcessMCMC.py    From pyGPGO with MIT License 5 votes vote down vote up
def predict(self, Xstar, return_std=False, nsamples=10):
        """
        Returns mean and covariances for each posterior sampled Student-t Process.

        Parameters
        ----------
        Xstar: np.ndarray, shape=((nsamples, nfeatures))
            Testing instances to predict.
        return_std: bool
            Whether to return the standard deviation of the posterior process. Otherwise,
            it returns the whole covariance matrix of the posterior process.
        nsamples:
            Number of posterior MCMC samples to consider.

        Returns
        -------
        np.ndarray
            Mean of the posterior process for each MCMC sample and Xstar.
        np.ndarray
            Covariance posterior process for each MCMC sample and Xstar.
        """
        chunk = list(self.trace)
        chunk = chunk[::-1][:nsamples]
        post_mean = []
        post_var = []
        for posterior_sample in chunk:
            params = self._extractParam(posterior_sample, self.covfunc.parameters)
            covfunc = self.covfunc.__class__(**params)
            gp = tStudentProcess(covfunc, nu=self.nu + self.n)
            gp.fit(self.X, self.y)
            m, s = gp.predict(Xstar, return_std=return_std)
            post_mean.append(m)
            post_var.append(s)
        return np.array(post_mean), np.array(post_var) 
Example #13
Source File: tStudentProcessMCMC.py    From pyGPGO with MIT License 5 votes vote down vote up
def fit(self, X, y):
        """
        Fits a Student-t regressor using MCMC.

        Parameters
        ----------
        X: np.ndarray, shape=(nsamples, nfeatures)
            Training instances to fit the GP.
        y: np.ndarray, shape=(nsamples,)
            Corresponding continuous target values to `X`.

        """
        self.X = X
        self.n = self.X.shape[0]
        self.y = y
        self.model = pm.Model()

        with self.model as model:
            l = pm.Uniform('l', 0, 10)

            log_s2_f = pm.Uniform('log_s2_f', lower=-7, upper=5)
            s2_f = pm.Deterministic('sigmaf', tt.exp(log_s2_f))

            log_s2_n = pm.Uniform('log_s2_n', lower=-7, upper=5)
            s2_n = pm.Deterministic('sigman', tt.exp(log_s2_n))

            f_cov = s2_f * covariance_equivalence[type(self.covfunc).__name__](1, l)
            Sigma = f_cov(self.X) + tt.eye(self.n) * s2_n ** 2
            y_obs = pm.MvStudentT('y_obs', nu=self.nu, mu=np.zeros(self.n), Sigma=Sigma, observed=self.y)
        with self.model as model:
            if self.step is not None:
                self.trace = pm.sample(self.niter, step=self.step())[self.burnin:]
            else:
                self.trace = pm.sample(self.niter, init=self.init)[self.burnin:] 
Example #14
Source File: GaussianProcessMCMC.py    From pyGPGO with MIT License 5 votes vote down vote up
def predict(self, Xstar, return_std=False, nsamples=10):
        """
        Returns mean and covariances for each posterior sampled Gaussian Process.

        Parameters
        ----------
        Xstar: np.ndarray, shape=((nsamples, nfeatures))
            Testing instances to predict.
        return_std: bool
            Whether to return the standard deviation of the posterior process. Otherwise,
            it returns the whole covariance matrix of the posterior process.
        nsamples:
            Number of posterior MCMC samples to consider.

        Returns
        -------
        np.ndarray
            Mean of the posterior process for each MCMC sample and `Xstar`.
        np.ndarray
            Covariance posterior process for each MCMC sample and `Xstar`.
        """
        chunk = list(self.trace)
        chunk = chunk[::-1][:nsamples]
        post_mean = []
        post_var = []
        for posterior_sample in chunk:
            params = self._extractParam(posterior_sample, self.covfunc.parameters)
            covfunc = self.covfunc.__class__(**params)
            gp = GaussianProcess(covfunc)
            gp.fit(self.X, self.y)
            m, s = gp.predict(Xstar, return_std=return_std)
            post_mean.append(m)
            post_var.append(s)
        return np.array(post_mean), np.array(post_var) 
Example #15
Source File: GaussianProcessMCMC.py    From pyGPGO with MIT License 5 votes vote down vote up
def fit(self, X, y):
        """
        Fits a Gaussian Process regressor using MCMC.

        Parameters
        ----------
        X: np.ndarray, shape=(nsamples, nfeatures)
            Training instances to fit the GP.
        y: np.ndarray, shape=(nsamples,)
            Corresponding continuous target values to `X`.

        """
        self.X = X
        self.n = self.X.shape[0]
        self.y = y
        self.model = pm.Model()

        with self.model as model:
            l = pm.Uniform('l', 0, 10)

            log_s2_f = pm.Uniform('log_s2_f', lower=-7, upper=5)
            s2_f = pm.Deterministic('sigmaf', tt.exp(log_s2_f))

            log_s2_n = pm.Uniform('log_s2_n', lower=-7, upper=5)
            s2_n = pm.Deterministic('sigman', tt.exp(log_s2_n))

            f_cov = s2_f * covariance_equivalence[type(self.covfunc).__name__](1, l)
            Sigma = f_cov(self.X) + tt.eye(self.n) * s2_n ** 2
            y_obs = pm.MvNormal('y_obs', mu=np.zeros(self.n), cov=Sigma, observed=self.y)
        with self.model as model:
            if self.step is not None:
                self.trace = pm.sample(self.niter, step=self.step())[self.burnin:]
            else:
                self.trace = pm.sample(self.niter, init=self.init)[self.burnin:] 
Example #16
Source File: bayesian.py    From cryptotrader with MIT License 5 votes vote down vote up
def show_posteriors(self, kde=True):
        if hasattr(self, 'burned_trace'):
            pm.plots.traceplot(trace=self.burned_trace, varnames=["std", "beta", "alpha"])
            pm.plot_posterior(trace=self.burned_trace, varnames=["std", "beta", "alpha"], kde_plot=kde)
            pm.plots.autocorrplot(trace=self.burned_trace, varnames=["std", "beta", "alpha"])
        else:
            print("You must sample from the posteriors first.") 
Example #17
Source File: likelihoods.py    From cs-ranking with Apache License 2.0 4 votes vote down vote up
def fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs):
    callbacks = vi_params.get("callbacks", [])
    for i, c in enumerate(callbacks):
        if isinstance(c, CheckParametersConvergence):
            params = c.__dict__
            params.pop("_diff")
            params.pop("prev")
            params.pop("ord")
            params["diff"] = "absolute"
            callbacks[i] = CheckParametersConvergence(**params)
    if sampler == "variational":
        with self.model:
            try:
                self.trace = pm.sample(chains=2, cores=8, tune=5, draws=5)
                vi_params["start"] = self.trace[-1]
                self.trace_vi = pm.fit(**vi_params)
                self.trace = self.trace_vi.sample(draws=draws)
            except Exception as e:
                if hasattr(e, "message"):
                    message = e.message
                else:
                    message = e
                self.logger.error(message)
                self.trace_vi = None
        if self.trace_vi is None and self.trace is None:
            with self.model:
                self.logger.info(
                    "Error in vi ADVI sampler using Metropolis sampler with draws {}".format(
                        draws
                    )
                )
                self.trace = pm.sample(
                    chains=1, cores=4, tune=20, draws=20, step=pm.NUTS()
                )
    elif sampler == "metropolis":
        with self.model:
            start = pm.find_MAP()
            self.trace = pm.sample(
                chains=2,
                cores=8,
                tune=tune,
                draws=draws,
                **kwargs,
                step=pm.Metropolis(),
                start=start,
            )
    else:
        with self.model:
            self.trace = pm.sample(
                chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.NUTS()
            ) 
Example #18
Source File: pymc.py    From bambi with MIT License 4 votes vote down vote up
def run(self, start=None, method="mcmc", init="auto", n_init=50000, **kwargs):
        """Run the PyMC3 MCMC sampler.

        Parameters
        ----------
        start: dict, or array of dict
            Starting parameter values to pass to sampler; see ``'pm.sample()'`` for details.
        method: str
            The method to use for fitting the model. By default, 'mcmc', in which case the
            PyMC3 sampler will be used. Alternatively, 'advi', in which case the model will be
            fitted using  automatic differentiation variational inference as implemented in PyMC3.
            Finally, 'laplace', in wich case a laplace approximation is used, 'laplace' is not
            recommended other than for pedagogical use.
        init: str
            Initialization method (see PyMC3 sampler documentation). Currently, this is
            ``'jitter+adapt_diag'``, but this can change in the future.
        n_init: int
            Number of initialization iterations if init = 'advi' or 'nuts'. Default is kind of in
            PyMC3 for the kinds of models we expect to see run with bambi, so we lower it
            considerably.

        Returns
        -------
        An ArviZ InferenceData instance.
        """
        model = self.model

        if method.lower() == "mcmc":
            samples = kwargs.pop("samples", 1000)
            with model:
                self.trace = pm.sample(samples, start=start, init=init, n_init=n_init, **kwargs)

            return from_pymc3(self.trace, model=model)

        elif method.lower() == "advi":
            with model:
                self.advi_params = pm.variational.ADVI(start, **kwargs)
            return (
                self.advi_params
            )  # this should return an InferenceData object (once arviz adds support for VI)

        elif method.lower() == "laplace":
            return _laplace(model) 
Example #19
Source File: imcmc.py    From imcmc with MIT License 4 votes vote down vote up
def sample_grayscale(image, samples=5000, tune=100, nchains=4, threshold=0.2):
    """Run MCMC on a 1 color image. Works best on logos or text.

    Parameters
    ----------
    image : numpy.ndarray
        Image array from `load_image`.  Should have `image.ndims == 2`.

    samples : int
        Number of samples to draw from the image

    tune : int
        Number of tuning steps to take. Note that this adjusts the step size:
            if you want smaller steps, make tune closer to 0.

    nchains : int
        Number of chains to sample with. This will later turn into the number
        of colors in your plot. Note that you get `samples * nchains` of total
        points in your final scatter.

    threshold : float
        Float between 0 and 1. It looks nicer when an image is binarized, and
        this will do that. Use `None` to not binarize. In theory you should get
        fewer samples from lighter areas, but your mileage may vary.

    Returns
    -------
    pymc3.MultiTrace of samples from the image. Each sample is an (x, y) float
        of indices that were sampled, with the variable name 'image'.
    """
    # preprocess
    image_copy = image.copy()
    if threshold != -1:
        image_copy[image < threshold] = 0
        image_copy[image >= threshold] = 1

    # need an active pixel to start on
    active_pixels = np.array(list(zip(*np.where(image_copy == image_copy.max()))))
    idx = np.random.randint(0, len(active_pixels), nchains)
    start = active_pixels[idx]

    with pm.Model():
        pm.DensityDist('image', ImageLikelihood(image_copy), shape=2)
        trace = pm.sample(samples,
                          tune=tune,
                          chains=nchains, step=pm.Metropolis(),
                          start=[{'image': x} for x in start],
                         )
    return trace 
Example #20
Source File: ab_exp.py    From abyes with Apache License 2.0 4 votes vote down vote up
def posterior_mcmc(self, data):
        """
        Find posterior distribution for the numerical method of solution
        """

        with pm.Model() as ab_model:
            # priors
            mua = pm.distributions.continuous.Beta('muA', alpha=self.alpha_prior, beta=self.beta_prior)
            mub = pm.distributions.continuous.Beta('muB', alpha=self.alpha_prior, beta=self.beta_prior)
            # likelihoods
            pm.Bernoulli('likelihoodA', mua, observed=data[0])
            pm.Bernoulli('likelihoodB', mub, observed=data[1])

            # find distribution of difference
            pm.Deterministic('lift', mub - mua)
            # find distribution of effect size
            sigma_a = pm.Deterministic('sigmaA', np.sqrt(mua * (1 - mua)))
            sigma_b = pm.Deterministic('sigmaB', np.sqrt(mub * (1 - mub)))
            pm.Deterministic('effect_size', (mub - mua) / (np.sqrt(0.5 * (sigma_a ** 2 + sigma_b ** 2))))

            start = pm.find_MAP()
            step = pm.Slice()
            trace = pm.sample(self.iterations, step=step, start=start)

        bins = np.linspace(0, 1, self.resolution)
        mua = np.histogram(trace['muA'][500:], bins=bins, normed=True)
        mub = np.histogram(trace['muB'][500:], bins=bins, normed=True)
        sigma_a = np.histogram(trace['sigmaA'][500:], bins=bins, normed=True)
        sigma_b = np.histogram(trace['sigmaB'][500:], bins=bins, normed=True)

        rvs = trace['lift'][500:]
        bins = np.linspace(np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution)
        lift = np.histogram(rvs, bins=bins, normed=True)

        rvs = trace['effect_size'][500:]
        bins = np.linspace(np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution)
        pes = np.histogram(rvs, bins=bins, normed=True)

        posterior = {'muA': mua, 'muB': mub, 'sigmaA': sigma_a, 'sigmaB': sigma_b,
                     'lift': lift, 'es': pes, 'prior': self.prior()}

        return posterior 
Example #21
Source File: helpers.py    From arviz with Apache License 2.0 4 votes vote down vote up
def pystan_noncentered_schools(data, draws, chains):
    """Non-centered eight schools implementation for pystan."""
    schools_code = """
        data {
            int<lower=0> J;
            real y[J];
            real<lower=0> sigma[J];
        }

        parameters {
            real mu;
            real<lower=0> tau;
            real eta[J];
        }

        transformed parameters {
            real theta[J];
            for (j in 1:J)
                theta[j] = mu + tau * eta[j];
        }

        model {
            mu ~ normal(0, 5);
            tau ~ cauchy(0, 5);
            eta ~ normal(0, 1);
            y ~ normal(theta, sigma);
        }

        generated quantities {
            vector[J] log_lik;
            vector[J] y_hat;
            for (j in 1:J) {
                log_lik[j] = normal_lpdf(y[j] | theta[j], sigma[j]);
                y_hat[j] = normal_rng(theta[j], sigma[j]);
            }
        }
    """
    if pystan_version() == 2:
        import pystan  # pylint: disable=import-error

        stan_model = pystan.StanModel(model_code=schools_code)
        fit = stan_model.sampling(
            data=data,
            iter=draws + 500,
            warmup=500,
            chains=chains,
            check_hmc_diagnostics=False,
            control=dict(adapt_engaged=False),
        )
    else:
        import stan  # pylint: disable=import-error

        stan_model = stan.build(schools_code, data=data)
        fit = stan_model.sample(
            num_chains=chains, num_samples=draws, num_warmup=500, save_warmup=False
        )
    return stan_model, fit 
Example #22
Source File: pmm.py    From autoimpute with MIT License 4 votes vote down vote up
def __init__(self, **kwargs):
        """Create an instance of the PMMImputer class.

        The class requires multiple arguments necessary to create priors for
        a bayesian linear regression equation and least squares itself.
        Therefore, PMM arguments include all of those seen in bayesian least
        squares and least squares itself. New parameters include `neighbors`,
        or the number of neighbors that PMM uses to sample observed.

        Args:
            **kwargs: default keyword arguments for lm & bayesian analysis.
                Note - kwargs popped for default arguments defined below.
                Next set of kwargs popped and sent to linear regression.
                Rest of kwargs passed as params to sampling (see pymc3).
            am (float, Optional): mean of alpha prior. Default 0.
            asd (float, Optional): std. deviation of alpha prior. Default 10.
            bm (float, Optional): mean of beta priors. Default 0.
            bsd (float, Optional): std. deviation of beta priors. Default 10.
            sig (float, Optional): parameter of sigma prior. Default 1.
            sample (int, Optional): number of posterior samples per chain.
                Default = 1000. More samples, longer to run, but better
                approximation of the posterior & chance of convergence.
            tune (int, Optional): parameter for tuning. Draws done in addition
                to sample. Default = 1000.
            init (str, Optional): MCMC algo to use for posterior sampling.
                Default = 'auto'. See pymc3 docs for more info on choices.
            fill_value (str, Optional): How to draw from the posterior to
                create imputations. Default is "random". 'random' and 'mean'
                supported for explicit options.
            neighbors (int, Optional): number of neighbors. Default is 5.
                Value should be greater than 0 and less than # observed,
                although anything greater than 10-20 generally too high
                unless dataset is massive.
            fit_intercept (bool, Optional): sklearn LinearRegression param.
            normalize (bool, Optional): sklearn LinearRegression param.
            copy_x (bool, Optional): sklearn LinearRegression param.
            n_jobs (int, Optional): sklearn LinearRegression param.
        """
        self.am = kwargs.pop("am", None)
        self.asd = kwargs.pop("asd", 10)
        self.bm = kwargs.pop("bm", None)
        self.bsd = kwargs.pop("bsd", 10)
        self.sig = kwargs.pop("sig", 1)
        self.sample = kwargs.pop("sample", 1000)
        self.tune = kwargs.pop("tune", 1000)
        self.init = kwargs.pop("init", "auto")
        self.fill_value = kwargs.pop("fill_value", "random")
        self.neighbors = kwargs.pop("neighbors", 5)
        self.fit_intercept = kwargs.pop("fit_intercept", True)
        self.normalize = kwargs.pop("normalize", False)
        self.copy_x = kwargs.pop("copy_x", True)
        self.n_jobs = kwargs.pop("n_jobs", None)
        self.lm = LinearRegression(
            self.fit_intercept,
            self.normalize,
            self.copy_x,
            self.n_jobs
        )
        self.sample_kwargs = kwargs 
Example #23
Source File: bayesian_regression.py    From autoimpute with MIT License 4 votes vote down vote up
def impute(self, X):
        """Generate imputations using predictions from the fit bayesian model.

        The impute method returns the values for imputation. Missing values
        in a given dataset are replaced with the samples from the posterior
        predictive distribution of each missing data point.

        Args:
            X (pd.DataFrame): predictors to determine imputed values.

        Returns:
            np.array: imputated dataset.
        """
        # check if fitted then predict with least squares
        check_is_fitted(self, "statistics_")
        model = self.statistics_["param"]["model"]
        labels = self.statistics_["param"]["labels"]

        # add a Deterministic node for each missing value
        # sampling then pulls from the posterior predictive distribution
        # each missing data point. I.e. distribution for EACH missing
        with model:
            p_pred = pm.Deterministic(
                "p_pred", pm.invlogit(model["alpha"] + model["beta"].dot(X.T))
            )
            tr = pm.sample(
                self.sample,
                tune=self.tune,
                init=self.init,
                **self.sample_kwargs
            )
        self.trace_ = tr

        # decide how to impute. Use mean of posterior predictive or random draw
        # not supported yet, but eventually consider using the MAP
        if not self.fill_value or self.fill_value == "mean":
            imp = tr["p_pred"].mean(0)
        elif self.fill_value == "random":
            imp = np.apply_along_axis(np.random.choice, 0, tr["p_pred"])
        else:
            err = f"{self.fill_value} must be 'mean' or 'random'."
            raise ValueError(err)

        # convert probabilities to class membership
        # then map class membership to corresponding label
        fill_thresh = np.vectorize(lambda f: 1 if f > self.thresh else 0)
        preds = fill_thresh(imp)
        label_dict = {i:j for i, j in enumerate(labels.values)}
        imp = Series(preds).replace(label_dict, inplace=False)
        return imp.values 
Example #24
Source File: bayesian_regression.py    From autoimpute with MIT License 4 votes vote down vote up
def __init__(self, **kwargs):
        """Create an instance of the BayesianBinaryLogisticImputer class.

        The class requires multiple arguments necessary to create priors for
        a bayesian logistic regression equation. The parameters are the same
        as linear regression, but the regression equation is transformed using
        pymc3's invlogit method. Because paramaters are treated as random
        variables, we must specify their distributions, including
        the parameters of those distributions. In the init method we also
        include arguments used to sample the posterior distributions.

        Args:
            **kwargs: default keyword arguments used for bayesian analysis.
                Note - kwargs popped for default arguments defined below.
                Rest of kwargs passed as params to sampling (see pymc3).
            am (float, Optional): mean of alpha prior. Default 0.
            asd (float, Optional): std. deviation of alpha prior. Default 10.
            bm (float, Optional): mean of beta priors. Default 0.
            bsd (float, Optional): std. deviation of beta priors. Default 10.
            thresh (float, Optional): threshold for class membership.
                Default 0.5. Max = 1, min = 0. Tune threshhold depending on
                class imbalance. Same as with logistic regression equation.
            sample (int, Optional): number of posterior samples per chain.
                Default = 1000. More samples, longer to run, but better
                approximation of the posterior & chance of convergence.
            tune (int, Optional): parameter for tuning. Draws done in addition
                to sample. Default = 1000.
            init (str, Optional): MCMC algo to use for posterior sampling.
                Default = 'auto'. See pymc3 docs for more info on choices.
            fill_value (str, Optional): How to draw from the posterior to
                create imputations. Default is None. 'random' and 'mean'
                supported for explicit options.
        """
        self.am = kwargs.pop("am", 0)
        self.asd = kwargs.pop("asd", 10)
        self.bm = kwargs.pop("bm", 0)
        self.bsd = kwargs.pop("bsd", 10)
        self.thresh = kwargs.pop("thresh", 0.5)
        self.sample = kwargs.pop("sample", 1000)
        self.tune = kwargs.pop("tune", 1000)
        self.init = kwargs.pop("init", "auto")
        self.fill_value = kwargs.pop("fill_value", None)
        self.sample_kwargs = kwargs 
Example #25
Source File: bayesian_regression.py    From autoimpute with MIT License 4 votes vote down vote up
def impute(self, X):
        """Generate imputations using predictions from the fit bayesian model.

        The transform method returns the values for imputation. Missing values
        in a given dataset are replaced with the samples from the posterior
        predictive distribution of each missing data point.

        Args:
            X (pd.DataFrame): predictors to determine imputed values.

        Returns:
            np.array: imputed dataset.
        """
        # check if fitted then predict with least squares
        check_is_fitted(self, "statistics_")
        model = self.statistics_["param"]

        # add a Deterministic node for each missing value
        # sampling then pulls from the posterior predictive distribution
        # each missing data point. I.e. distribution for EACH missing
        with model:
            mu_pred = pm.Deterministic(
                "mu_pred", model["alpha"]+model["beta"].dot(X.T)
            )
            tr = pm.sample(
                self.sample,
                tune=self.tune,
                init=self.init,
                **self.sample_kwargs
            )
        self.trace_ = tr

        # decide how to impute. Use mean of posterior predictive or random draw
        # not supported yet, but eventually consider using the MAP
        if not self.fill_value or self.fill_value == "mean":
            imp = tr["mu_pred"].mean(0)
        elif self.fill_value == "random":
            imp = np.apply_along_axis(np.random.choice, 0, tr["mu_pred"])
        else:
            err = f"{self.fill_value} must be 'mean' or 'random'."
            raise ValueError(err)
        return imp 
Example #26
Source File: hbr.py    From nispat with GNU General Public License v3.0 4 votes vote down vote up
def estimate_on_new_site(self, X, y, batch_effects):
        """ Function to adapt the model """
        
        if len(X.shape)==1:
            X = np.expand_dims(X, axis=1)
        if len(y.shape)==1:
            y = np.expand_dims(y, axis=1)
        if len(batch_effects.shape)==1:
            batch_effects = np.expand_dims(batch_effects, axis=1)
        
        self.batch_effects_num = batch_effects.shape[1]
        self.batch_effects_size = []
        for i in range(self.batch_effects_num):
            self.batch_effects_size.append(len(np.unique(batch_effects[:,i])))
            
        if self.model_type == 'linear': 
            with hbr(X, y, batch_effects, self.batch_effects_size, 
                               self.configs, trace = self.trace):    
                self.trace = pm.sample(self.configs['n_samples'], 
                                       tune=self.configs['n_tuning'], 
                                       chains=self.configs['n_chains'],  
                                       target_accept=self.configs['target_accept'], 
                                       init=self.configs['init'], n_init=50000, 
                                       cores=self.configs['cores'])
        elif self.model_type == 'polynomial': 
            X = create_poly_basis(X, self.configs['order'])
            with hbr(X, y, batch_effects, self.batch_effects_size, 
                               self.configs, trace = self.trace):    
                self.trace = pm.sample(self.configs['n_samples'], 
                                       tune=self.configs['n_tuning'], 
                                       chains=self.configs['n_chains'],  
                                       target_accept=self.configs['target_accept'], 
                                       init=self.configs['init'], n_init=50000, 
                                       cores=self.configs['cores'])
        if self.model_type == 'bspline': 
            X = bspline_transform(X, self.bsp)
            with hbr(X, y, batch_effects, self.batch_effects_size, 
                               self.configs, trace = self.trace):    
                self.trace = pm.sample(self.configs['n_samples'], 
                                       tune=self.configs['n_tuning'], 
                                       chains=self.configs['n_chains'], 
                                       target_accept=self.configs['target_accept'], 
                                       init=self.configs['init'], n_init=50000, 
                                       cores=self.configs['cores'])
        elif self.model_type == 'nn': 
            with nn_hbr(X, y, batch_effects, self.batch_effects_size, 
                               self.configs, trace = self.trace):    
                self.trace = pm.sample(self.configs['n_samples'], 
                                       tune=self.configs['n_tuning'], 
                                       chains=self.configs['n_chains'], 
                                       target_accept=self.configs['target_accept'], 
                                       init=self.configs['init'], n_init=50000, 
                                       cores=self.configs['cores'])
                
        return self.trace 
Example #27
Source File: hbr.py    From nispat with GNU General Public License v3.0 4 votes vote down vote up
def estimate(self, X, y, batch_effects):
        """ Function to estimate the model """
        
        if len(X.shape)==1:
            X = np.expand_dims(X, axis=1)
        if len(y.shape)==1:
            y = np.expand_dims(y, axis=1)
        if len(batch_effects.shape)==1:
            batch_effects = np.expand_dims(batch_effects, axis=1)
        
        self.batch_effects_num = batch_effects.shape[1]
        self.batch_effects_size = []
        for i in range(self.batch_effects_num):
            self.batch_effects_size.append(len(np.unique(batch_effects[:,i])))
        
        if self.model_type == 'linear': 
            with hbr(X, y, batch_effects, self.batch_effects_size, 
                               self.configs):    
                self.trace = pm.sample(self.configs['n_samples'], 
                                       tune=self.configs['n_tuning'], 
                                       chains=self.configs['n_chains'],  
                                       target_accept=self.configs['target_accept'], 
                                       init=self.configs['init'], n_init=50000, 
                                       cores=self.configs['cores'])
        elif self.model_type == 'polynomial': 
            X = create_poly_basis(X, self.configs['order'])
            with hbr(X, y, batch_effects, self.batch_effects_size, 
                               self.configs):    
                self.trace = pm.sample(self.configs['n_samples'], 
                                       tune=self.configs['n_tuning'], 
                                       chains=self.configs['n_chains'],  
                                       target_accept=self.configs['target_accept'], 
                                       init=self.configs['init'], n_init=50000, 
                                       cores=self.configs['cores'])
        elif self.model_type == 'bspline': 
            self.bsp = bspline_fit(X, self.configs['order'], self.configs['nknots'])
            X = bspline_transform(X, self.bsp)
            with hbr(X, y, batch_effects, self.batch_effects_size, 
                               self.configs):    
                self.trace = pm.sample(self.configs['n_samples'], 
                                       tune=self.configs['n_tuning'], 
                                       chains=self.configs['n_chains'],  
                                       target_accept=self.configs['target_accept'], 
                                       init=self.configs['init'], n_init=50000, 
                                       cores=self.configs['cores'])
        elif self.model_type == 'nn': 
            with nn_hbr(X, y, batch_effects, self.batch_effects_size, 
                               self.configs):    
                self.trace = pm.sample(self.configs['n_samples'], 
                                       tune=self.configs['n_tuning'], 
                                       chains=self.configs['n_chains'], 
                                       target_accept=self.configs['target_accept'], 
                                       init=self.configs['init'], n_init=50000, 
                                       cores=self.configs['cores'])
                
        return self.trace