Python statsmodels.api.WLS Examples
The following are 23
code examples of statsmodels.api.WLS().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
statsmodels.api
, or try the search function
.
Example #1
Source File: linear_model.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def whiten(self, X): """ Whitener for WLS model, multiplies each column by sqrt(self.weights) Parameters ---------- X : array-like Data to be whitened Returns ------- sqrt(weights)*X """ #print(self.weights.var())) X = np.asarray(X) if X.ndim == 1: return X * np.sqrt(self.weights) elif X.ndim == 2: return np.sqrt(self.weights)[:, None]*X
Example #2
Source File: linear_model.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def __init__(self, endog, exog, weights=1., missing='none', hasconst=None, **kwargs): weights = np.array(weights) if weights.shape == (): if (missing == 'drop' and 'missing_idx' in kwargs and kwargs['missing_idx'] is not None): # patsy may have truncated endog weights = np.repeat(weights, len(kwargs['missing_idx'])) else: weights = np.repeat(weights, len(endog)) # handle case that endog might be of len == 1 if len(weights) == 1: weights = np.array([weights.squeeze()]) else: weights = weights.squeeze() super(WLS, self).__init__(endog, exog, missing=missing, weights=weights, hasconst=hasconst, **kwargs) nobs = self.exog.shape[0] weights = self.weights # Experimental normalization of weights weights = weights / np.sum(weights) * nobs if weights.size != nobs and weights.shape[0] != nobs: raise ValueError('Weights must be scalar or same length as design')
Example #3
Source File: linear_model.py From vnpy_crypto with MIT License | 6 votes |
def __init__(self, endog, exog, weights=1., missing='none', hasconst=None, **kwargs): weights = np.array(weights) if weights.shape == (): if (missing == 'drop' and 'missing_idx' in kwargs and kwargs['missing_idx'] is not None): # patsy may have truncated endog weights = np.repeat(weights, len(kwargs['missing_idx'])) else: weights = np.repeat(weights, len(endog)) # handle case that endog might be of len == 1 if len(weights) == 1: weights = np.array([weights.squeeze()]) else: weights = weights.squeeze() super(WLS, self).__init__(endog, exog, missing=missing, weights=weights, hasconst=hasconst, **kwargs) nobs = self.exog.shape[0] weights = self.weights # Experimental normalization of weights weights = weights / np.sum(weights) * nobs if weights.size != nobs and weights.shape[0] != nobs: raise ValueError('Weights must be scalar or same length as design')
Example #4
Source File: linear_model.py From vnpy_crypto with MIT License | 6 votes |
def whiten(self, X): """ Whitener for WLS model, multiplies each column by sqrt(self.weights) Parameters ---------- X : array-like Data to be whitened Returns ------- whitened : array-like sqrt(weights)*X """ X = np.asarray(X) if X.ndim == 1: return X * np.sqrt(self.weights) elif X.ndim == 2: return np.sqrt(self.weights)[:, None]*X
Example #5
Source File: test_ols.py From Computable with MIT License | 6 votes |
def testWLS(self): # WLS centered SS changed (fixed) in 0.5.0 sm_version = sm.version.version if sm_version < LooseVersion('0.5.0'): raise nose.SkipTest("WLS centered SS not fixed in statsmodels" " version {0}".format(sm_version)) X = DataFrame(np.random.randn(30, 4), columns=['A', 'B', 'C', 'D']) Y = Series(np.random.randn(30)) weights = X.std(1) self._check_wls(X, Y, weights) weights.ix[[5, 15]] = np.nan Y[[2, 21]] = np.nan self._check_wls(X, Y, weights)
Example #6
Source File: test_ols.py From Computable with MIT License | 6 votes |
def _check_wls(self, x, y, weights): result = ols(y=y, x=x, weights=1 / weights) combined = x.copy() combined['__y__'] = y combined['__weights__'] = weights combined = combined.dropna() endog = combined.pop('__y__').values aweights = combined.pop('__weights__').values exog = sm.add_constant(combined.values, prepend=False) sm_result = sm.WLS(endog, exog, weights=1 / aweights).fit() assert_almost_equal(sm_result.params, result._beta_raw) assert_almost_equal(sm_result.resid, result._resid_raw) self.checkMovingOLS('rolling', x, y, weights=weights) self.checkMovingOLS('expanding', x, y, weights=weights)
Example #7
Source File: fit.py From DIVE-backend with GNU General Public License v3.0 | 6 votes |
def reg_m(y, x, estimator, weights=None): ones = np.ones(len(x[0])) X = sm.add_constant(np.column_stack((x[0], ones))) for ele in x[1:]: X = sm.add_constant(np.column_stack((ele, X))) if estimator=='ols': return sm.OLS(y, X).fit() elif estimator=='wls': return sm.WLS(y, X, weights).fit() elif estimator=='gls': return sm.GLS(y, X).fit() return None ############################ #Run general linear regression ####func array contains the array of functions consdered in the regression ####params coefficients are reversed; the first param coefficient corresponds to the last function in func array ####notice the independent vectors are given in dictionary format, egs:{'bob':[1,2,3,4,5],'mary':[1,2,3,4,5]}
Example #8
Source File: test_generic_methods.py From vnpy_crypto with MIT License | 5 votes |
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y = x.sum(1) + np.random.randn(x.shape[0]) self.results = sm.WLS(y, self.exog, weights=np.ones(len(y))).fit()
Example #9
Source File: linear_model.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def loglike(self, params): """ Returns the value of the gaussian log-likelihood function at params. Given the whitened design matrix, the log-likelihood is evaluated at the parameter vector `params` for the dependent variable `Y`. Parameters ---------- params : array-like The parameter estimates. Returns ------- llf : float The value of the log-likelihood function for a WLS Model. Notes -------- .. math:: -\\frac{n}{2}\\log\\left(Y-\\hat{Y}\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}log\\left(\\left|W\\right|\\right) where :math:`W` is a diagonal matrix """ nobs2 = self.nobs / 2.0 SSR = np.sum((self.wendog - np.dot(self.wexog,params))**2, axis=0) llf = -np.log(SSR) * nobs2 # concentrated likelihood llf -= (1+np.log(np.pi/nobs2))*nobs2 # with constant llf += 0.5 * np.sum(np.log(self.weights)) return llf
Example #10
Source File: linear_model.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def loglike(self, params): """ Returns the value of the Gaussian log-likelihood function at params. Given the whitened design matrix, the log-likelihood is evaluated at the parameter vector `params` for the dependent variable `endog`. Parameters ---------- params : array-like The parameter estimates Returns ------- loglike : float The value of the log-likelihood function for a GLS Model. Notes ----- The log-likelihood function for the normal distribution is .. math:: -\\frac{n}{2}\\log\\left(\\left(Y-\\hat{Y}\\right)^{\\prime}\\left(Y-\\hat{Y}\\right)\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}\\log\\left(\\left|\\Sigma\\right|\\right) Y and Y-hat are whitened. """ #TODO: combine this with OLS/WLS loglike and add _det_sigma argument nobs2 = self.nobs / 2.0 SSR = np.sum((self.wendog - np.dot(self.wexog, params))**2, axis=0) llf = -np.log(SSR) * nobs2 # concentrated likelihood llf -= (1+np.log(np.pi/nobs2))*nobs2 # with likelihood constant if np.any(self.sigma): #FIXME: robust-enough check? unneeded if _det_sigma gets defined if self.sigma.ndim==2: det = np.linalg.slogdet(self.sigma) llf -= .5*det[1] else: llf -= 0.5*np.sum(np.log(self.sigma)) # with error covariance matrix return llf
Example #11
Source File: LocalLinearRegression.py From langchangetrack with BSD 3-Clause "New" or "Revised" License | 5 votes |
def predict(self, X): neighbors = self.nn.kneighbors(X) distances = neighbors[0][0] neighbor_indices = neighbors[1][0] local_X = self.X.take(neighbor_indices, axis=0) local_Y = self.Y.take(neighbor_indices, axis=0) wls = sm.WLS(local_Y, local_X, weights=self.weight_func(distances)).fit() return wls.predict(X)
Example #12
Source File: selection.py From Enrich2 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def validate(self): """ Raises an informative ``ValueError`` if the time points in the analysis are not suitable. Calls validate method on all child SeqLibs. """ # check the time points if 0 not in self.timepoints: raise ValueError("Missing timepoint 0 [{}]".format(self.name)) if self.timepoints[0] != 0: raise ValueError("Invalid negative timepoint [{}]".format(self.name)) if len(self.timepoints) < 2: raise ValueError("Multiple timepoints required [{}]".format(self.name)) elif len(self.timepoints) < 3 and self.scoring_method in ("WLS", "OLS"): raise ValueError( "Insufficient number of timepoints for regression scoring [{}]".format( self.name ) ) # check the wild type sequences if self.has_wt_sequence(): for child in self.children[1:]: if self.children[0].wt != child.wt: self.logger.warning("Inconsistent wild type sequences") break # check that we're not doing wild type normalization on something with no wild type # if not self.has_wt_sequence() and self.logr_method == "wt": # raise ValueError("No wild type sequence for wild type normalization [{}]".format(self.name)) # validate children for child in self.children: child.validate()
Example #13
Source File: selection.py From Enrich2 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def regression_apply(row, timepoints, weighted): """ :py:meth:`pandas.DataFrame.apply` apply function for calculating enrichment using linear regression. If *weighted* is ``True`` perform weighted least squares; else perform ordinary least squares. Weights for weighted least squares are included in *row*. Returns a :py:class:`pandas.Series` containing regression coefficients, residuals, and statistics. """ # retrieve log ratios from the row y = row[["L_{}".format(t) for t in timepoints]] # re-scale the x's to fall within [0, 1] xvalues = [x / float(max(timepoints)) for x in timepoints] # perform the fit X = sm.add_constant(xvalues) # fit intercept if weighted: W = row[["W_{}".format(t) for t in timepoints]] fit = sm.WLS(y, X, weights=W).fit() else: fit = sm.OLS(y, X).fit() # re-format as a data frame row values = np.concatenate( [fit.params, [fit.bse["x1"], fit.tvalues["x1"], fit.pvalues["x1"]], fit.resid] ) index = ["intercept", "slope", "SE_slope", "t", "pvalue_raw"] + [ "e_{}".format(t) for t in timepoints ] return pd.Series(data=values, index=index)
Example #14
Source File: smoothers.py From plotnine with GNU General Public License v2.0 | 5 votes |
def lm(data, xseq, **params): """ Fit OLS / WLS if data has weight """ if params['formula']: return lm_formula(data, xseq, **params) X = sm.add_constant(data['x']) Xseq = sm.add_constant(xseq) weights = data.get('weights', None) if weights is None: init_kwargs, fit_kwargs = separate_method_kwargs( params['method_args'], sm.OLS, sm.OLS.fit) model = sm.OLS(data['y'], X, **init_kwargs) else: if np.any(weights < 0): raise ValueError( "All weights must be greater than zero." ) init_kwargs, fit_kwargs = separate_method_kwargs( params['method_args'], sm.WLS, sm.WLS.fit) model = sm.WLS(data['y'], X, weights=data['weight'], **init_kwargs) results = model.fit(**fit_kwargs) data = pd.DataFrame({'x': xseq}) data['y'] = results.predict(Xseq) if params['se']: alpha = 1 - params['level'] prstd, iv_l, iv_u = wls_prediction_std( results, Xseq, alpha=alpha) data['se'] = prstd data['ymin'] = iv_l data['ymax'] = iv_u return data
Example #15
Source File: ols.py From Computable with MIT License | 5 votes |
def __init__(self, y, x, intercept=True, weights=None, nw_lags=None, nw_overlap=False): try: import statsmodels.api as sm except ImportError: import scikits.statsmodels.api as sm self._x_orig = x self._y_orig = y self._weights_orig = weights self._intercept = intercept self._nw_lags = nw_lags self._nw_overlap = nw_overlap (self._y, self._x, self._weights, self._x_filtered, self._index, self._time_has_obs) = self._prepare_data() if self._weights is not None: self._x_trans = self._x.mul(np.sqrt(self._weights), axis=0) self._y_trans = self._y * np.sqrt(self._weights) self.sm_ols = sm.WLS(self._y.get_values(), self._x.get_values(), weights=self._weights.values).fit() else: self._x_trans = self._x self._y_trans = self._y self.sm_ols = sm.OLS(self._y.get_values(), self._x.get_values()).fit()
Example #16
Source File: linear_model.py From vnpy_crypto with MIT License | 5 votes |
def loglike(self, params): """ Returns the value of the Gaussian log-likelihood function at params. Given the whitened design matrix, the log-likelihood is evaluated at the parameter vector `params` for the dependent variable `endog`. Parameters ---------- params : array-like The parameter estimates Returns ------- loglike : float The value of the log-likelihood function for a GLS Model. Notes ----- The log-likelihood function for the normal distribution is .. math:: -\\frac{n}{2}\\log\\left(\\left(Y-\\hat{Y}\\right)^{\\prime}\\left(Y-\\hat{Y}\\right)\\right)-\\frac{n}{2}\\left(1+\\log\\left(\\frac{2\\pi}{n}\\right)\\right)-\\frac{1}{2}\\log\\left(\\left|\\Sigma\\right|\\right) Y and Y-hat are whitened. """ # TODO: combine this with OLS/WLS loglike and add _det_sigma argument nobs2 = self.nobs / 2.0 SSR = np.sum((self.wendog - np.dot(self.wexog, params))**2, axis=0) llf = -np.log(SSR) * nobs2 # concentrated likelihood llf -= (1+np.log(np.pi/nobs2))*nobs2 # with likelihood constant if np.any(self.sigma): # FIXME: robust-enough check? unneeded if _det_sigma gets defined if self.sigma.ndim == 2: det = np.linalg.slogdet(self.sigma) llf -= .5*det[1] else: llf -= 0.5*np.sum(np.log(self.sigma)) # with error covariance matrix return llf
Example #17
Source File: test_shrink_pickle.py From vnpy_crypto with MIT License | 5 votes |
def setup(self): #fit for each test, because results will be changed by test x = self.exog np.random.seed(987689) y = x.sum(1) + np.random.randn(x.shape[0]) self.results = sm.WLS(y, self.exog, weights=np.ones(len(y))).fit()
Example #18
Source File: smoothers.py From plotnine with GNU General Public License v2.0 | 4 votes |
def lm_formula(data, xseq, **params): """ Fit OLS / WLS using a formula """ formula = params['formula'] eval_env = params['enviroment'] weights = data.get('weight', None) if weights is None: init_kwargs, fit_kwargs = separate_method_kwargs( params['method_args'], sm.OLS, sm.OLS.fit) model = smf.ols( formula, data, eval_env=eval_env, **init_kwargs ) else: if np.any(weights < 0): raise ValueError( "All weights must be greater than zero." ) init_kwargs, fit_kwargs = separate_method_kwargs( params['method_args'], sm.OLS, sm.OLS.fit) model = smf.wls( formula, data, weights=weights, eval_env=eval_env, **init_kwargs ) results = model.fit(**fit_kwargs) data = pd.DataFrame({'x': xseq}) data['y'] = results.predict(data) if params['se']: _, predictors = dmatrices(formula, data, eval_env=eval_env) alpha = 1 - params['level'] prstd, iv_l, iv_u = wls_prediction_std( results, predictors, alpha=alpha) data['se'] = prstd data['ymin'] = iv_l data['ymax'] = iv_u return data
Example #19
Source File: selection.py From Enrich2 with BSD 3-Clause "New" or "Revised" License | 4 votes |
def calculate(self): """ Wrapper method to calculate counts and enrichment scores for all data in the :py:class:`~selection.Selection`. """ if len(self.labels) == 0: raise ValueError( "No data present across all sequencing libraries [{}]".format(self.name) ) for label in self.labels: self.merge_counts_unfiltered(label) self.filter_counts(label) if self.is_barcodevariant() or self.is_barcodeid(): self.combine_barcode_maps() if self.scoring_method == "counts": pass elif self.scoring_method == "ratios": for label in self.labels: self.calc_ratios(label) elif self.scoring_method == "simple": for label in self.labels: self.calc_simple_ratios(label) elif self.scoring_method in ("WLS", "OLS"): if len(self.timepoints) <= 2: raise ValueError( "Regression-based scoring requires three or more time points." ) for label in self.labels: self.calc_log_ratios(label) if self.scoring_method == "WLS": self.calc_weights(label) self.calc_regression(label) else: raise ValueError( 'Invalid scoring method "{}" [{}]'.format( self.scoring_method, self.name ) ) if self.scoring_method in ("ratios", "WLS", "OLS") and self.component_outliers: if self.is_barcodevariant() or self.is_barcodeid(): self.calc_outliers("barcodes") if self.is_coding(): self.calc_outliers("variants")
Example #20
Source File: selection.py From Enrich2 with BSD 3-Clause "New" or "Revised" License | 4 votes |
def wt_plot(self, pdf): """ Create a plot of the linear fit of the wild type variant. *pdf* is an open PdfPages instance. Only created for selections that use WLS or OLS scoring and have a wild type specified. Uses :py:func:`~plots.fit_axes` for the plotting. """ self.logger.info("Creating wild type fit plot") # get the data and calculate log ratios if "variants" in self.labels: wt_label = "variants" elif "identifiers" in self.labels: wt_label = "identifiers" else: raise ValueError( "Invalid selection type for wild type fit plot [{}]".format(self.name) ) data = self.store.select( "/main/{}/counts".format(wt_label), where='index = "{}"'.format(WILD_TYPE_VARIANT), ).ix[0] sums = self.store["/main/{}/counts".format(wt_label)].sum( axis="index" ) # sum of complete cases (N') yvalues = np.log(data + 0.5) - np.log(sums + 0.5) xvalues = [tp / float(max(self.timepoints)) for tp in self.timepoints] # fit the line X = sm.add_constant(xvalues) # fit intercept if self.scoring_method == "WLS": W = 1 / (1 / (data + 0.5) + 1 / (sums + 0.5)) fit = sm.WLS(yvalues, X, weights=W).fit() elif self.scoring_method == "OLS": fit = sm.OLS(yvalues, X).fit() else: raise ValueError( 'Invalid regression scoring method "{}" [{}]'.format( self.scoring_method, self.name ) ) intercept, slope = fit.params slope_se = fit.bse["x1"] # make the plot fig, ax = plt.subplots() fig.set_tight_layout(True) fit_axes(ax, xvalues, yvalues, slope, intercept, xlabels=self.timepoints) fit_axes_text(ax, cornertext="Slope {:3.2f}\nSE {:.1f}".format(slope, slope_se)) ax.set_title("Wild Type Shape\n{}".format(self.name)) ax.set_ylabel("Log Ratio (Complete Cases)") pdf.savefig(fig) plt.close(fig)
Example #21
Source File: modeler.py From rsmtool with Apache License 2.0 | 4 votes |
def train_score_weighted_lr(self, df_train, feature_columns): """ Train `ScoreWeightedLR` - Linear regression model weighted by score. Parameters ---------- df_train : pd.DataFrame Data frame containing the features on which to train the model. feature_columns : list A list of feature columns to use in training the model. Returns ------- learner : skll.Learner The SKLL learner object fit : statsmodels.RegressionResults A statsmodels regression results object or None. df_coef : pd.DataFrame The model coefficients in a data_frame used_features : list A list of features used in the final model. """ # train weighted least squares regression # get the feature columns X = df_train[feature_columns] # add the intercept X = sm.add_constant(X) # define the weights as inverse proportion of total # number of data points for each score score_level_dict = df_train['sc1'].value_counts() expected_proportion = 1 / len(score_level_dict) score_weights_dict = {sc1: expected_proportion / count for sc1, count in score_level_dict.items()} weights = [score_weights_dict[sc1] for sc1 in df_train['sc1']] # fit the model fit = sm.WLS(df_train['sc1'], X, weights=weights).fit() df_coef = self.ols_coefficients_to_dataframe(fit.params) learner = self.create_fake_skll_learner(df_coef) # we used all the features used_features = feature_columns return learner, fit, df_coef, used_features
Example #22
Source File: propensity_weighted_ols.py From whynot with MIT License | 4 votes |
def estimate_treatment_effect(covariates, treatment, outcome): """Estimate treatment effects using propensity weighted least-squares. Parameters ---------- covariates: `np.ndarray` Array of shape [num_samples, num_features] of features treatment: `np.ndarray` Binary array of shape [num_samples] indicating treatment status for each sample. outcome: `np.ndarray` Array of shape [num_samples] containing the observed outcome for each sample. Returns ------- result: `whynot.framework.InferenceResult` InferenceResult object for this procedure """ start_time = perf_counter() # Compute propensity scores with logistic regression model. features = sm.add_constant(covariates, prepend=True, has_constant="add") logit = sm.Logit(treatment, features) model = logit.fit(disp=0) propensities = model.predict(features) # IP-weights treated = treatment == 1.0 untreated = treatment == 0.0 weights = treated / propensities + untreated / (1.0 - propensities) treatment = treatment.reshape(-1, 1) features = np.concatenate([treatment, covariates], axis=1) features = sm.add_constant(features, prepend=True, has_constant="add") model = sm.WLS(outcome, features, weights=weights) results = model.fit() stop_time = perf_counter() # Treatment is the second variable (after the constant offset) ate = results.params[1] stderr = results.bse[1] conf_int = tuple(results.conf_int()[1]) return InferenceResult( ate=ate, stderr=stderr, ci=conf_int, individual_effects=None, elapsed_time=stop_time - start_time, )
Example #23
Source File: LiCSBAS_tools_lib.py From LiCSBAS with GNU General Public License v3.0 | 4 votes |
def fit2d(A,w=None,deg="1"): """ Estimate best fit plain with indicated degree of polynomial. Inputs: A : Input ndarray (can include nan) w : Wieight (1/std**2) for each element of A (with the same dimention as A) deg : degree of polynomial of fitting plain - 1 -> a+bx+cy (ramp) - bl -> a+bx+cy+dxy (biliner) - 2 -> a+bx+cy+dxy+ex**2_fy**2 (2d polynomial) Returns: Afit : Best fit plain with the same demention as A m : set of parameters of best fit plain (a,b,c...) """ ### Make design matrix G length,width = A.shape #read dimension Xgrid,Ygrid = np.meshgrid(np.arange(width),np.arange(length)) #mesh grid if str(deg) == "1": G = np.stack((np.ones((length*width)), Xgrid.flatten(), Ygrid.flatten())).T elif str(deg) == "bl": G = np.stack((np.ones((length*width)), Xgrid.flatten(), Ygrid.flatten(), Xgrid.flatten()*Ygrid.flatten())).T elif str(deg) == "2": G = np.stack((np.ones((length*width)), Xgrid.flatten(), Ygrid.flatten(), Xgrid.flatten()*Ygrid.flatten(), Xgrid.flatten()**2, Ygrid.flatten()**2)).T else: print('\nERROR: Not proper deg ({}) is used\n'.format(deg), file=sys.stderr) return False ### Handle nan by 0 padding and 0 weight # Not drop in sm because cannot return Afit if np.any(np.isnan(A)): bool_nan = np.isnan(A) A = A.copy() # to avoid change original value in main A[bool_nan] = 0 if w is None: w = np.ones_like(A) w = w.copy() # to avoid change original value in main w[bool_nan] = 0 ### Invert if w is None: ## Ordinary LS results = sm.OLS(A.ravel(), G).fit() else: ## Weighted LS results = sm.WLS(A.ravel(), G, weights=w.ravel()).fit() m = results.params Afit = np.float32(results.predict().reshape((length,width))) return Afit,m #%%