Python pymc3.sample() Examples
The following are 27
code examples of pymc3.sample().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pymc3
, or try the search function
.
Example #1
Source File: cbc_hb.py From lifestyles with MIT License | 6 votes |
def model(profiles, comparisons, selections, sample=2500, alpha_prior_std=10): all_attributes = pd.get_dummies(profiles).columns profiles_dummies = pd.get_dummies(profiles, drop_first=True) choices = pd.concat({profile: profiles_dummies.loc[comparisons[profile]].reset_index(drop=True) for profile in comparisons.columns}, axis=1) respondants = selections.columns n_attributes_in_model = profiles_dummies.shape[1] n_participants = selections.shape[1] with pm.Model(): # https://www.sawtoothsoftware.com/download/ssiweb/CBCHB_Manual.pdf # need to include the covariance matrix as a parent of `partsworth` alpha = pm.Normal('alpha', 0, sd=alpha_prior_std, shape=n_attributes_in_model, testval=np.random.randn(n_attributes_in_model)) partsworth = pm.MvNormal("partsworth", alpha, tau=np.eye(n_attributes_in_model), shape=(n_participants, n_attributes_in_model)) cs = [_create_observation_variable(selection, choices, partsworth[i, :]) for i, (_, selection) in enumerate(selections.iteritems())] trace = pm.sample(sample) return transform_trace_to_individual_summary_statistics(trace, respondants, profiles_dummies.columns, all_attributes)
Example #2
Source File: bayesian.py From cryptotrader with MIT License | 6 votes |
def fit(self, X, Y, n_samples=10000, tune_steps=1000, n_jobs=4): with pm.Model() as self.model: # Priors std = pm.Uniform("std", 0, self.sps, testval=X.std()) beta = pm.StudentT("beta", mu=0, lam=self.sps, nu=self.nu) alpha = pm.StudentT("alpha", mu=0, lam=self.sps, nu=self.nu, testval=Y.mean()) # Deterministic model mean = pm.Deterministic("mean", alpha + beta * X) # Posterior distribution obs = pm.Normal("obs", mu=mean, sd=std, observed=Y) ## Run MCMC # Find search start value with maximum a posterior estimation start = pm.find_MAP() # sample posterior distribution for latent variables trace = pm.sample(n_samples, njobs=n_jobs, tune=tune_steps, start=start) # Recover posterior samples self.burned_trace = trace[int(n_samples / 2):]
Example #3
Source File: helpers.py From arviz with Apache License 2.0 | 5 votes |
def pymc3_noncentered_schools(data, draws, chains): """Non-centered eight schools implementation for pymc3.""" import pymc3 as pm with pm.Model() as model: mu = pm.Normal("mu", mu=0, sd=5) tau = pm.HalfCauchy("tau", beta=5) eta = pm.Normal("eta", mu=0, sd=1, shape=data["J"]) theta = pm.Deterministic("theta", mu + tau * eta) pm.Normal("obs", mu=theta, sd=data["sigma"], observed=data["y"]) trace = pm.sample(draws, chains=chains) return model, trace
Example #4
Source File: helpers.py From arviz with Apache License 2.0 | 5 votes |
def _numpyro_noncentered_model(J, sigma, y=None): import numpyro import numpyro.distributions as dist mu = numpyro.sample("mu", dist.Normal(0, 5)) tau = numpyro.sample("tau", dist.HalfCauchy(5)) with numpyro.plate("J", J): eta = numpyro.sample("eta", dist.Normal(0, 1)) theta = mu + tau * eta return numpyro.sample("obs", dist.Normal(theta, sigma), obs=y)
Example #5
Source File: helpers.py From arviz with Apache License 2.0 | 5 votes |
def _pyro_noncentered_model(J, sigma, y=None): import pyro import pyro.distributions as dist mu = pyro.sample("mu", dist.Normal(0, 5)) tau = pyro.sample("tau", dist.HalfCauchy(5)) with pyro.plate("J", J): eta = pyro.sample("eta", dist.Normal(0, 1)) theta = mu + tau * eta return pyro.sample("obs", dist.Normal(theta, sigma), obs=y)
Example #6
Source File: imcmc.py From imcmc with MIT License | 5 votes |
def sample_color(image, samples=5000, tune=1000, nchains=4): """Run MCMC on a color image. EXPERIMENTAL! Parameters ---------- image : numpy.ndarray Image array from `load_image`. Should have `image.ndims == 2`. samples : int Number of samples to draw from the image tune : int All chains start at the same spot, so it is good to let them wander apart a bit before beginning Returns ------- pymc3.MultiTrace of samples from the image. Each sample is an (x, y) float of indices that were sampled, with three variables named 'red', 'green', 'blue'. """ with pm.Model(): pm.DensityDist('red', ImageLikelihood(image[:, :, 0]), shape=2) pm.DensityDist('green', ImageLikelihood(image[:, :, 1]), shape=2) pm.DensityDist('blue', ImageLikelihood(image[:, :, 2]), shape=2) trace = pm.sample(samples, chains=nchains, tune=tune, step=pm.Metropolis()) return trace
Example #7
Source File: pymc3.py From pyPESTO with BSD 3-Clause "New" or "Revised" License | 5 votes |
def sample( self, n_samples: int, beta: float = 1.): problem = self.problem log_post_fun = TheanoLogProbability(problem, beta) trace = self.trace x0 = None if self.x0 is not None: x0 = {x_name: val for x_name, val in zip(self.problem.x_names, self.x0)} # create model context with pm.Model() as model: # uniform bounds k = [pm.Uniform(x_name, lower=lb, upper=ub) for x_name, lb, ub in zip(problem.get_reduced_vector(problem.x_names), problem.lb, problem.ub)] # convert to tensor vector theta = tt.as_tensor_variable(k) # use a DensityDist for the log-posterior pm.DensityDist('log_post', logp=lambda v: log_post_fun(v), observed={'v': theta}) # step, by default automatically determined by pymc3 step = None if self.step_function: step = self.step_function() # perform the actual sampling trace = pm.sample( draws=int(n_samples), trace=trace, start=x0, step=step, **self.options) # convert trace to inference data object data = az.from_pymc3(trace=trace, model=model) self.trace = trace self.data = data
Example #8
Source File: mcmc_sampler.py From dowhy with MIT License | 5 votes |
def fit_causal_model(self, g, df, data_types, initialization_trace=None): if nx.is_directed_acyclic_graph(g): with pm.Model() as model: g = self.apply_data_types(g, data_types) g = self.apply_parents(g) g = self.apply_parameters(g, df, initialization_trace=initialization_trace) g = self.build_bayesian_network(g, df) trace = pm.sample(1000, tune=1000) else: raise Exception("Graph is not a DAG!") return g, trace
Example #9
Source File: bayesian_regression.py From autoimpute with MIT License | 5 votes |
def __init__(self, **kwargs): """Create an instance of the BayesianLeastSquaresImputer class. The class requires multiple arguments necessary to create priors for a bayesian linear regression equation. The regression is: alpha + beta * X + epsilson. Because paramaters are treated as random variables, we must specify their distributions, including the parameters of those distributions. In thie init method we also include arguments used to sample the posterior distributions. Args: **kwargs: default keyword arguments used for bayesian analysis. Note - kwargs popped for default arguments defined below. Rest of kwargs passed as params to sampling (see pymc3). am (float, Optional): mean of alpha prior. Default 0. asd (float, Optional): std. deviation of alpha prior. Default 10. bm (float, Optional): mean of beta priors. Default 0. bsd (float, Optional): std. deviation of beta priors. Default 10. sig (float, Optional): parameter of sigma prior. Default 1. sample (int, Optional): number of posterior samples per chain. Default = 1000. More samples, longer to run, but better approximation of the posterior & chance of convergence. tune (int, Optional): parameter for tuning. Draws done in addition to sample. Default = 1000. init (str, Optional): MCMC algo to use for posterior sampling. Default = 'auto'. See pymc3 docs for more info on choices. fill_value (str, Optional): How to draw from the posterior to create imputations. Default is None. 'random' and 'mean' supported for explicit options. """ self.am = kwargs.pop("am", 0) self.asd = kwargs.pop("asd", 10) self.bm = kwargs.pop("bm", 0) self.bsd = kwargs.pop("bsd", 10) self.sig = kwargs.pop("sig", 1) self.sample = kwargs.pop("sample", 1000) self.tune = kwargs.pop("tune", 1000) self.init = kwargs.pop("init", "auto") self.fill_value = kwargs.pop("fill_value", None) self.sample_kwargs = kwargs
Example #10
Source File: hbr.py From nispat with GNU General Public License v3.0 | 5 votes |
def sample_prior_predictive(self, X, batch_effects, samples, trace=None): """ Function to sample from prior predictive distribution """ if len(X.shape)==1: X = np.expand_dims(X, axis=1) if len(batch_effects.shape)==1: batch_effects = np.expand_dims(batch_effects, axis=1) self.batch_effects_num = batch_effects.shape[1] self.batch_effects_size = [] for i in range(self.batch_effects_num): self.batch_effects_size.append(len(np.unique(batch_effects[:,i]))) y = np.zeros([X.shape[0],1]) if self.model_type == 'linear': with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace): ppc = pm.sample_prior_predictive(samples=samples) elif self.model_type == 'polynomial': X = create_poly_basis(X, self.configs['order']) with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace): ppc = pm.sample_prior_predictive(samples=samples) elif self.model_type == 'bspline': self.bsp = bspline_fit(X, self.configs['order'], self.configs['nknots']) X = bspline_transform(X, self.bsp) with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace): ppc = pm.sample_prior_predictive(samples=samples) elif self.model_type == 'nn': with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace): ppc = pm.sample_prior_predictive(samples=samples) return ppc
Example #11
Source File: stochastic_volatility.py From fin with MIT License | 5 votes |
def main(): #load data returns = data.get_data_google('SPY', start='2008-5-1', end='2009-12-1')['Close'].pct_change() returns.plot() plt.ylabel('daily returns in %'); with pm.Model() as sp500_model: nu = pm.Exponential('nu', 1./10, testval=5.0) sigma = pm.Exponential('sigma', 1./0.02, testval=0.1) s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns)) r = pm.StudentT('r', nu, lam=pm.math.exp(-2*s), observed=returns) with sp500_model: trace = pm.sample(2000) pm.traceplot(trace, [nu, sigma]); plt.show() plt.figure() returns.plot() plt.plot(returns.index, np.exp(trace['s',::5].T), 'r', alpha=.03) plt.legend(['S&P500', 'stochastic volatility process']) plt.show()
Example #12
Source File: tStudentProcessMCMC.py From pyGPGO with MIT License | 5 votes |
def predict(self, Xstar, return_std=False, nsamples=10): """ Returns mean and covariances for each posterior sampled Student-t Process. Parameters ---------- Xstar: np.ndarray, shape=((nsamples, nfeatures)) Testing instances to predict. return_std: bool Whether to return the standard deviation of the posterior process. Otherwise, it returns the whole covariance matrix of the posterior process. nsamples: Number of posterior MCMC samples to consider. Returns ------- np.ndarray Mean of the posterior process for each MCMC sample and Xstar. np.ndarray Covariance posterior process for each MCMC sample and Xstar. """ chunk = list(self.trace) chunk = chunk[::-1][:nsamples] post_mean = [] post_var = [] for posterior_sample in chunk: params = self._extractParam(posterior_sample, self.covfunc.parameters) covfunc = self.covfunc.__class__(**params) gp = tStudentProcess(covfunc, nu=self.nu + self.n) gp.fit(self.X, self.y) m, s = gp.predict(Xstar, return_std=return_std) post_mean.append(m) post_var.append(s) return np.array(post_mean), np.array(post_var)
Example #13
Source File: tStudentProcessMCMC.py From pyGPGO with MIT License | 5 votes |
def fit(self, X, y): """ Fits a Student-t regressor using MCMC. Parameters ---------- X: np.ndarray, shape=(nsamples, nfeatures) Training instances to fit the GP. y: np.ndarray, shape=(nsamples,) Corresponding continuous target values to `X`. """ self.X = X self.n = self.X.shape[0] self.y = y self.model = pm.Model() with self.model as model: l = pm.Uniform('l', 0, 10) log_s2_f = pm.Uniform('log_s2_f', lower=-7, upper=5) s2_f = pm.Deterministic('sigmaf', tt.exp(log_s2_f)) log_s2_n = pm.Uniform('log_s2_n', lower=-7, upper=5) s2_n = pm.Deterministic('sigman', tt.exp(log_s2_n)) f_cov = s2_f * covariance_equivalence[type(self.covfunc).__name__](1, l) Sigma = f_cov(self.X) + tt.eye(self.n) * s2_n ** 2 y_obs = pm.MvStudentT('y_obs', nu=self.nu, mu=np.zeros(self.n), Sigma=Sigma, observed=self.y) with self.model as model: if self.step is not None: self.trace = pm.sample(self.niter, step=self.step())[self.burnin:] else: self.trace = pm.sample(self.niter, init=self.init)[self.burnin:]
Example #14
Source File: GaussianProcessMCMC.py From pyGPGO with MIT License | 5 votes |
def predict(self, Xstar, return_std=False, nsamples=10): """ Returns mean and covariances for each posterior sampled Gaussian Process. Parameters ---------- Xstar: np.ndarray, shape=((nsamples, nfeatures)) Testing instances to predict. return_std: bool Whether to return the standard deviation of the posterior process. Otherwise, it returns the whole covariance matrix of the posterior process. nsamples: Number of posterior MCMC samples to consider. Returns ------- np.ndarray Mean of the posterior process for each MCMC sample and `Xstar`. np.ndarray Covariance posterior process for each MCMC sample and `Xstar`. """ chunk = list(self.trace) chunk = chunk[::-1][:nsamples] post_mean = [] post_var = [] for posterior_sample in chunk: params = self._extractParam(posterior_sample, self.covfunc.parameters) covfunc = self.covfunc.__class__(**params) gp = GaussianProcess(covfunc) gp.fit(self.X, self.y) m, s = gp.predict(Xstar, return_std=return_std) post_mean.append(m) post_var.append(s) return np.array(post_mean), np.array(post_var)
Example #15
Source File: GaussianProcessMCMC.py From pyGPGO with MIT License | 5 votes |
def fit(self, X, y): """ Fits a Gaussian Process regressor using MCMC. Parameters ---------- X: np.ndarray, shape=(nsamples, nfeatures) Training instances to fit the GP. y: np.ndarray, shape=(nsamples,) Corresponding continuous target values to `X`. """ self.X = X self.n = self.X.shape[0] self.y = y self.model = pm.Model() with self.model as model: l = pm.Uniform('l', 0, 10) log_s2_f = pm.Uniform('log_s2_f', lower=-7, upper=5) s2_f = pm.Deterministic('sigmaf', tt.exp(log_s2_f)) log_s2_n = pm.Uniform('log_s2_n', lower=-7, upper=5) s2_n = pm.Deterministic('sigman', tt.exp(log_s2_n)) f_cov = s2_f * covariance_equivalence[type(self.covfunc).__name__](1, l) Sigma = f_cov(self.X) + tt.eye(self.n) * s2_n ** 2 y_obs = pm.MvNormal('y_obs', mu=np.zeros(self.n), cov=Sigma, observed=self.y) with self.model as model: if self.step is not None: self.trace = pm.sample(self.niter, step=self.step())[self.burnin:] else: self.trace = pm.sample(self.niter, init=self.init)[self.burnin:]
Example #16
Source File: bayesian.py From cryptotrader with MIT License | 5 votes |
def show_posteriors(self, kde=True): if hasattr(self, 'burned_trace'): pm.plots.traceplot(trace=self.burned_trace, varnames=["std", "beta", "alpha"]) pm.plot_posterior(trace=self.burned_trace, varnames=["std", "beta", "alpha"], kde_plot=kde) pm.plots.autocorrplot(trace=self.burned_trace, varnames=["std", "beta", "alpha"]) else: print("You must sample from the posteriors first.")
Example #17
Source File: likelihoods.py From cs-ranking with Apache License 2.0 | 4 votes |
def fit_pymc3_model(self, sampler, draws, tune, vi_params, **kwargs): callbacks = vi_params.get("callbacks", []) for i, c in enumerate(callbacks): if isinstance(c, CheckParametersConvergence): params = c.__dict__ params.pop("_diff") params.pop("prev") params.pop("ord") params["diff"] = "absolute" callbacks[i] = CheckParametersConvergence(**params) if sampler == "variational": with self.model: try: self.trace = pm.sample(chains=2, cores=8, tune=5, draws=5) vi_params["start"] = self.trace[-1] self.trace_vi = pm.fit(**vi_params) self.trace = self.trace_vi.sample(draws=draws) except Exception as e: if hasattr(e, "message"): message = e.message else: message = e self.logger.error(message) self.trace_vi = None if self.trace_vi is None and self.trace is None: with self.model: self.logger.info( "Error in vi ADVI sampler using Metropolis sampler with draws {}".format( draws ) ) self.trace = pm.sample( chains=1, cores=4, tune=20, draws=20, step=pm.NUTS() ) elif sampler == "metropolis": with self.model: start = pm.find_MAP() self.trace = pm.sample( chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.Metropolis(), start=start, ) else: with self.model: self.trace = pm.sample( chains=2, cores=8, tune=tune, draws=draws, **kwargs, step=pm.NUTS() )
Example #18
Source File: pymc.py From bambi with MIT License | 4 votes |
def run(self, start=None, method="mcmc", init="auto", n_init=50000, **kwargs): """Run the PyMC3 MCMC sampler. Parameters ---------- start: dict, or array of dict Starting parameter values to pass to sampler; see ``'pm.sample()'`` for details. method: str The method to use for fitting the model. By default, 'mcmc', in which case the PyMC3 sampler will be used. Alternatively, 'advi', in which case the model will be fitted using automatic differentiation variational inference as implemented in PyMC3. Finally, 'laplace', in wich case a laplace approximation is used, 'laplace' is not recommended other than for pedagogical use. init: str Initialization method (see PyMC3 sampler documentation). Currently, this is ``'jitter+adapt_diag'``, but this can change in the future. n_init: int Number of initialization iterations if init = 'advi' or 'nuts'. Default is kind of in PyMC3 for the kinds of models we expect to see run with bambi, so we lower it considerably. Returns ------- An ArviZ InferenceData instance. """ model = self.model if method.lower() == "mcmc": samples = kwargs.pop("samples", 1000) with model: self.trace = pm.sample(samples, start=start, init=init, n_init=n_init, **kwargs) return from_pymc3(self.trace, model=model) elif method.lower() == "advi": with model: self.advi_params = pm.variational.ADVI(start, **kwargs) return ( self.advi_params ) # this should return an InferenceData object (once arviz adds support for VI) elif method.lower() == "laplace": return _laplace(model)
Example #19
Source File: imcmc.py From imcmc with MIT License | 4 votes |
def sample_grayscale(image, samples=5000, tune=100, nchains=4, threshold=0.2): """Run MCMC on a 1 color image. Works best on logos or text. Parameters ---------- image : numpy.ndarray Image array from `load_image`. Should have `image.ndims == 2`. samples : int Number of samples to draw from the image tune : int Number of tuning steps to take. Note that this adjusts the step size: if you want smaller steps, make tune closer to 0. nchains : int Number of chains to sample with. This will later turn into the number of colors in your plot. Note that you get `samples * nchains` of total points in your final scatter. threshold : float Float between 0 and 1. It looks nicer when an image is binarized, and this will do that. Use `None` to not binarize. In theory you should get fewer samples from lighter areas, but your mileage may vary. Returns ------- pymc3.MultiTrace of samples from the image. Each sample is an (x, y) float of indices that were sampled, with the variable name 'image'. """ # preprocess image_copy = image.copy() if threshold != -1: image_copy[image < threshold] = 0 image_copy[image >= threshold] = 1 # need an active pixel to start on active_pixels = np.array(list(zip(*np.where(image_copy == image_copy.max())))) idx = np.random.randint(0, len(active_pixels), nchains) start = active_pixels[idx] with pm.Model(): pm.DensityDist('image', ImageLikelihood(image_copy), shape=2) trace = pm.sample(samples, tune=tune, chains=nchains, step=pm.Metropolis(), start=[{'image': x} for x in start], ) return trace
Example #20
Source File: ab_exp.py From abyes with Apache License 2.0 | 4 votes |
def posterior_mcmc(self, data): """ Find posterior distribution for the numerical method of solution """ with pm.Model() as ab_model: # priors mua = pm.distributions.continuous.Beta('muA', alpha=self.alpha_prior, beta=self.beta_prior) mub = pm.distributions.continuous.Beta('muB', alpha=self.alpha_prior, beta=self.beta_prior) # likelihoods pm.Bernoulli('likelihoodA', mua, observed=data[0]) pm.Bernoulli('likelihoodB', mub, observed=data[1]) # find distribution of difference pm.Deterministic('lift', mub - mua) # find distribution of effect size sigma_a = pm.Deterministic('sigmaA', np.sqrt(mua * (1 - mua))) sigma_b = pm.Deterministic('sigmaB', np.sqrt(mub * (1 - mub))) pm.Deterministic('effect_size', (mub - mua) / (np.sqrt(0.5 * (sigma_a ** 2 + sigma_b ** 2)))) start = pm.find_MAP() step = pm.Slice() trace = pm.sample(self.iterations, step=step, start=start) bins = np.linspace(0, 1, self.resolution) mua = np.histogram(trace['muA'][500:], bins=bins, normed=True) mub = np.histogram(trace['muB'][500:], bins=bins, normed=True) sigma_a = np.histogram(trace['sigmaA'][500:], bins=bins, normed=True) sigma_b = np.histogram(trace['sigmaB'][500:], bins=bins, normed=True) rvs = trace['lift'][500:] bins = np.linspace(np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution) lift = np.histogram(rvs, bins=bins, normed=True) rvs = trace['effect_size'][500:] bins = np.linspace(np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution) pes = np.histogram(rvs, bins=bins, normed=True) posterior = {'muA': mua, 'muB': mub, 'sigmaA': sigma_a, 'sigmaB': sigma_b, 'lift': lift, 'es': pes, 'prior': self.prior()} return posterior
Example #21
Source File: helpers.py From arviz with Apache License 2.0 | 4 votes |
def pystan_noncentered_schools(data, draws, chains): """Non-centered eight schools implementation for pystan.""" schools_code = """ data { int<lower=0> J; real y[J]; real<lower=0> sigma[J]; } parameters { real mu; real<lower=0> tau; real eta[J]; } transformed parameters { real theta[J]; for (j in 1:J) theta[j] = mu + tau * eta[j]; } model { mu ~ normal(0, 5); tau ~ cauchy(0, 5); eta ~ normal(0, 1); y ~ normal(theta, sigma); } generated quantities { vector[J] log_lik; vector[J] y_hat; for (j in 1:J) { log_lik[j] = normal_lpdf(y[j] | theta[j], sigma[j]); y_hat[j] = normal_rng(theta[j], sigma[j]); } } """ if pystan_version() == 2: import pystan # pylint: disable=import-error stan_model = pystan.StanModel(model_code=schools_code) fit = stan_model.sampling( data=data, iter=draws + 500, warmup=500, chains=chains, check_hmc_diagnostics=False, control=dict(adapt_engaged=False), ) else: import stan # pylint: disable=import-error stan_model = stan.build(schools_code, data=data) fit = stan_model.sample( num_chains=chains, num_samples=draws, num_warmup=500, save_warmup=False ) return stan_model, fit
Example #22
Source File: pmm.py From autoimpute with MIT License | 4 votes |
def __init__(self, **kwargs): """Create an instance of the PMMImputer class. The class requires multiple arguments necessary to create priors for a bayesian linear regression equation and least squares itself. Therefore, PMM arguments include all of those seen in bayesian least squares and least squares itself. New parameters include `neighbors`, or the number of neighbors that PMM uses to sample observed. Args: **kwargs: default keyword arguments for lm & bayesian analysis. Note - kwargs popped for default arguments defined below. Next set of kwargs popped and sent to linear regression. Rest of kwargs passed as params to sampling (see pymc3). am (float, Optional): mean of alpha prior. Default 0. asd (float, Optional): std. deviation of alpha prior. Default 10. bm (float, Optional): mean of beta priors. Default 0. bsd (float, Optional): std. deviation of beta priors. Default 10. sig (float, Optional): parameter of sigma prior. Default 1. sample (int, Optional): number of posterior samples per chain. Default = 1000. More samples, longer to run, but better approximation of the posterior & chance of convergence. tune (int, Optional): parameter for tuning. Draws done in addition to sample. Default = 1000. init (str, Optional): MCMC algo to use for posterior sampling. Default = 'auto'. See pymc3 docs for more info on choices. fill_value (str, Optional): How to draw from the posterior to create imputations. Default is "random". 'random' and 'mean' supported for explicit options. neighbors (int, Optional): number of neighbors. Default is 5. Value should be greater than 0 and less than # observed, although anything greater than 10-20 generally too high unless dataset is massive. fit_intercept (bool, Optional): sklearn LinearRegression param. normalize (bool, Optional): sklearn LinearRegression param. copy_x (bool, Optional): sklearn LinearRegression param. n_jobs (int, Optional): sklearn LinearRegression param. """ self.am = kwargs.pop("am", None) self.asd = kwargs.pop("asd", 10) self.bm = kwargs.pop("bm", None) self.bsd = kwargs.pop("bsd", 10) self.sig = kwargs.pop("sig", 1) self.sample = kwargs.pop("sample", 1000) self.tune = kwargs.pop("tune", 1000) self.init = kwargs.pop("init", "auto") self.fill_value = kwargs.pop("fill_value", "random") self.neighbors = kwargs.pop("neighbors", 5) self.fit_intercept = kwargs.pop("fit_intercept", True) self.normalize = kwargs.pop("normalize", False) self.copy_x = kwargs.pop("copy_x", True) self.n_jobs = kwargs.pop("n_jobs", None) self.lm = LinearRegression( self.fit_intercept, self.normalize, self.copy_x, self.n_jobs ) self.sample_kwargs = kwargs
Example #23
Source File: bayesian_regression.py From autoimpute with MIT License | 4 votes |
def impute(self, X): """Generate imputations using predictions from the fit bayesian model. The impute method returns the values for imputation. Missing values in a given dataset are replaced with the samples from the posterior predictive distribution of each missing data point. Args: X (pd.DataFrame): predictors to determine imputed values. Returns: np.array: imputated dataset. """ # check if fitted then predict with least squares check_is_fitted(self, "statistics_") model = self.statistics_["param"]["model"] labels = self.statistics_["param"]["labels"] # add a Deterministic node for each missing value # sampling then pulls from the posterior predictive distribution # each missing data point. I.e. distribution for EACH missing with model: p_pred = pm.Deterministic( "p_pred", pm.invlogit(model["alpha"] + model["beta"].dot(X.T)) ) tr = pm.sample( self.sample, tune=self.tune, init=self.init, **self.sample_kwargs ) self.trace_ = tr # decide how to impute. Use mean of posterior predictive or random draw # not supported yet, but eventually consider using the MAP if not self.fill_value or self.fill_value == "mean": imp = tr["p_pred"].mean(0) elif self.fill_value == "random": imp = np.apply_along_axis(np.random.choice, 0, tr["p_pred"]) else: err = f"{self.fill_value} must be 'mean' or 'random'." raise ValueError(err) # convert probabilities to class membership # then map class membership to corresponding label fill_thresh = np.vectorize(lambda f: 1 if f > self.thresh else 0) preds = fill_thresh(imp) label_dict = {i:j for i, j in enumerate(labels.values)} imp = Series(preds).replace(label_dict, inplace=False) return imp.values
Example #24
Source File: bayesian_regression.py From autoimpute with MIT License | 4 votes |
def __init__(self, **kwargs): """Create an instance of the BayesianBinaryLogisticImputer class. The class requires multiple arguments necessary to create priors for a bayesian logistic regression equation. The parameters are the same as linear regression, but the regression equation is transformed using pymc3's invlogit method. Because paramaters are treated as random variables, we must specify their distributions, including the parameters of those distributions. In the init method we also include arguments used to sample the posterior distributions. Args: **kwargs: default keyword arguments used for bayesian analysis. Note - kwargs popped for default arguments defined below. Rest of kwargs passed as params to sampling (see pymc3). am (float, Optional): mean of alpha prior. Default 0. asd (float, Optional): std. deviation of alpha prior. Default 10. bm (float, Optional): mean of beta priors. Default 0. bsd (float, Optional): std. deviation of beta priors. Default 10. thresh (float, Optional): threshold for class membership. Default 0.5. Max = 1, min = 0. Tune threshhold depending on class imbalance. Same as with logistic regression equation. sample (int, Optional): number of posterior samples per chain. Default = 1000. More samples, longer to run, but better approximation of the posterior & chance of convergence. tune (int, Optional): parameter for tuning. Draws done in addition to sample. Default = 1000. init (str, Optional): MCMC algo to use for posterior sampling. Default = 'auto'. See pymc3 docs for more info on choices. fill_value (str, Optional): How to draw from the posterior to create imputations. Default is None. 'random' and 'mean' supported for explicit options. """ self.am = kwargs.pop("am", 0) self.asd = kwargs.pop("asd", 10) self.bm = kwargs.pop("bm", 0) self.bsd = kwargs.pop("bsd", 10) self.thresh = kwargs.pop("thresh", 0.5) self.sample = kwargs.pop("sample", 1000) self.tune = kwargs.pop("tune", 1000) self.init = kwargs.pop("init", "auto") self.fill_value = kwargs.pop("fill_value", None) self.sample_kwargs = kwargs
Example #25
Source File: bayesian_regression.py From autoimpute with MIT License | 4 votes |
def impute(self, X): """Generate imputations using predictions from the fit bayesian model. The transform method returns the values for imputation. Missing values in a given dataset are replaced with the samples from the posterior predictive distribution of each missing data point. Args: X (pd.DataFrame): predictors to determine imputed values. Returns: np.array: imputed dataset. """ # check if fitted then predict with least squares check_is_fitted(self, "statistics_") model = self.statistics_["param"] # add a Deterministic node for each missing value # sampling then pulls from the posterior predictive distribution # each missing data point. I.e. distribution for EACH missing with model: mu_pred = pm.Deterministic( "mu_pred", model["alpha"]+model["beta"].dot(X.T) ) tr = pm.sample( self.sample, tune=self.tune, init=self.init, **self.sample_kwargs ) self.trace_ = tr # decide how to impute. Use mean of posterior predictive or random draw # not supported yet, but eventually consider using the MAP if not self.fill_value or self.fill_value == "mean": imp = tr["mu_pred"].mean(0) elif self.fill_value == "random": imp = np.apply_along_axis(np.random.choice, 0, tr["mu_pred"]) else: err = f"{self.fill_value} must be 'mean' or 'random'." raise ValueError(err) return imp
Example #26
Source File: hbr.py From nispat with GNU General Public License v3.0 | 4 votes |
def estimate_on_new_site(self, X, y, batch_effects): """ Function to adapt the model """ if len(X.shape)==1: X = np.expand_dims(X, axis=1) if len(y.shape)==1: y = np.expand_dims(y, axis=1) if len(batch_effects.shape)==1: batch_effects = np.expand_dims(batch_effects, axis=1) self.batch_effects_num = batch_effects.shape[1] self.batch_effects_size = [] for i in range(self.batch_effects_num): self.batch_effects_size.append(len(np.unique(batch_effects[:,i]))) if self.model_type == 'linear': with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace = self.trace): self.trace = pm.sample(self.configs['n_samples'], tune=self.configs['n_tuning'], chains=self.configs['n_chains'], target_accept=self.configs['target_accept'], init=self.configs['init'], n_init=50000, cores=self.configs['cores']) elif self.model_type == 'polynomial': X = create_poly_basis(X, self.configs['order']) with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace = self.trace): self.trace = pm.sample(self.configs['n_samples'], tune=self.configs['n_tuning'], chains=self.configs['n_chains'], target_accept=self.configs['target_accept'], init=self.configs['init'], n_init=50000, cores=self.configs['cores']) if self.model_type == 'bspline': X = bspline_transform(X, self.bsp) with hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace = self.trace): self.trace = pm.sample(self.configs['n_samples'], tune=self.configs['n_tuning'], chains=self.configs['n_chains'], target_accept=self.configs['target_accept'], init=self.configs['init'], n_init=50000, cores=self.configs['cores']) elif self.model_type == 'nn': with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs, trace = self.trace): self.trace = pm.sample(self.configs['n_samples'], tune=self.configs['n_tuning'], chains=self.configs['n_chains'], target_accept=self.configs['target_accept'], init=self.configs['init'], n_init=50000, cores=self.configs['cores']) return self.trace
Example #27
Source File: hbr.py From nispat with GNU General Public License v3.0 | 4 votes |
def estimate(self, X, y, batch_effects): """ Function to estimate the model """ if len(X.shape)==1: X = np.expand_dims(X, axis=1) if len(y.shape)==1: y = np.expand_dims(y, axis=1) if len(batch_effects.shape)==1: batch_effects = np.expand_dims(batch_effects, axis=1) self.batch_effects_num = batch_effects.shape[1] self.batch_effects_size = [] for i in range(self.batch_effects_num): self.batch_effects_size.append(len(np.unique(batch_effects[:,i]))) if self.model_type == 'linear': with hbr(X, y, batch_effects, self.batch_effects_size, self.configs): self.trace = pm.sample(self.configs['n_samples'], tune=self.configs['n_tuning'], chains=self.configs['n_chains'], target_accept=self.configs['target_accept'], init=self.configs['init'], n_init=50000, cores=self.configs['cores']) elif self.model_type == 'polynomial': X = create_poly_basis(X, self.configs['order']) with hbr(X, y, batch_effects, self.batch_effects_size, self.configs): self.trace = pm.sample(self.configs['n_samples'], tune=self.configs['n_tuning'], chains=self.configs['n_chains'], target_accept=self.configs['target_accept'], init=self.configs['init'], n_init=50000, cores=self.configs['cores']) elif self.model_type == 'bspline': self.bsp = bspline_fit(X, self.configs['order'], self.configs['nknots']) X = bspline_transform(X, self.bsp) with hbr(X, y, batch_effects, self.batch_effects_size, self.configs): self.trace = pm.sample(self.configs['n_samples'], tune=self.configs['n_tuning'], chains=self.configs['n_chains'], target_accept=self.configs['target_accept'], init=self.configs['init'], n_init=50000, cores=self.configs['cores']) elif self.model_type == 'nn': with nn_hbr(X, y, batch_effects, self.batch_effects_size, self.configs): self.trace = pm.sample(self.configs['n_samples'], tune=self.configs['n_tuning'], chains=self.configs['n_chains'], target_accept=self.configs['target_accept'], init=self.configs['init'], n_init=50000, cores=self.configs['cores']) return self.trace