Python statsmodels.api.Logit() Examples

The following are 14 code examples of statsmodels.api.Logit(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module statsmodels.api , or try the search function .
Example #1
Source File: pscore.py    From pscore_match with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def compute(self, method='logistic'):
        """
        Compute propensity score and measures of goodness-of-fit
        
        Parameters
        ----------
        method : str
            Propensity score estimation method. Either 'logistic' or 'probit'
        """
        predictors = sm.add_constant(self.covariates, prepend=False)
        if method == 'logistic':
            model = sm.Logit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
        elif method == 'probit':
            model = sm.Probit(self.treatment, predictors).fit(disp=False, warn_convergence=True)
        else:
            raise ValueError('Unrecognized method')
        return model.predict() 
Example #2
Source File: dominance.py    From dominance-analysis with MIT License 6 votes vote down vote up
def Nagelkerke_Rsquare(self,columns):
		cols=columns.copy()
		cols.append('intercept')
		log_clf=sm.Logit(self.data[self.target],self.data[cols])
		N=self.data.shape[0]
		# result=log_clf.fit(disp=0,method='powell')
		try:
			result=log_clf.fit(disp=0)
		except:
			result=log_clf.fit(disp=0,method='powell')
		llf=result.llf
		llnull=result.llnull
		lm=np.exp(llf)
		lnull=np.exp(llnull)
		naglkerke_rsquare=(1-(lnull/lm)**(2/N))/(1-lnull**(2/N))
		return naglkerke_rsquare 
Example #3
Source File: dominance.py    From dominance-analysis with MIT License 6 votes vote down vote up
def Cox_and_Snell_Rsquare(self,columns):
		cols=columns.copy()
		cols.append('intercept')
		log_clf=sm.Logit(self.data[self.target],self.data[cols])
		N=self.data.shape[0]
		# result=log_clf.fit(disp=0,method='powell')
		try:
			result=log_clf.fit(disp=0)
		except:
			result=log_clf.fit(disp=0,method='powell')
		llf=result.llf
		llnull=result.llnull
		lm=np.exp(llf)
		lnull=np.exp(llnull)
		cox_and_snell_rsquare=(1-(lnull/lm)**(2/N))
		return cox_and_snell_rsquare 
Example #4
Source File: test_glm.py    From py-glm with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_logistic_regressions():

    def _test_random_logistic_regression():
        n_uncorr_features, n_corr_features, n_drop_features = (
            generate_regression_hyperparamters())
        X, y, parameters = make_logistic_regression(
            n_samples=N_SAMPLES,
            n_uncorr_features=n_uncorr_features,
            n_corr_features=n_corr_features,
            n_drop_features=n_drop_features)
        lr = GLM(family=Bernoulli())
        lr.fit(X, y)
        #assert approx_equal(lr.coef_, parameters)
        mod = sm.Logit(y, X)
        res = mod.fit()
        assert approx_equal(lr.coef_, res.params)
        assert approx_equal(lr.coef_standard_error_, res.bse)

    for _ in range(N_REGRESSION_TESTS):
        _test_random_logistic_regression() 
Example #5
Source File: test_generic_methods.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def setup(self):
        #fit for each test, because results will be changed by test
        x = self.exog
        nobs = x.shape[0]
        np.random.seed(987689)
        y_bin = (np.random.rand(nobs) < 1.0 / (1 + np.exp(x.sum(1) - x.mean()))).astype(int)
        model = sm.Logit(y_bin, x)  #, exposure=np.ones(nobs), offset=np.zeros(nobs)) #bug with default
        # use start_params to converge faster
        start_params = np.array([-0.73403806, -1.00901514, -0.97754543, -0.95648212])
        self.results = model.fit(start_params=start_params, method='bfgs', disp=0) 
Example #6
Source File: test_shrink_pickle.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def setup(self):
        #fit for each test, because results will be changed by test
        x = self.exog
        nobs = x.shape[0]
        np.random.seed(987689)
        y_bin = (np.random.rand(nobs) < 1.0 / (1 + np.exp(x.sum(1) - x.mean()))).astype(int)
        model = sm.Logit(y_bin, x)  #, exposure=np.ones(nobs), offset=np.zeros(nobs)) #bug with default
        # use start_params to converge faster
        start_params = np.array([-0.73403806, -1.00901514, -0.97754543, -0.95648212])
        self.results = model.fit(start_params=start_params, method='bfgs', disp=0) 
Example #7
Source File: test_numdiff.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def setup_class(cls):
        data = sm.datasets.spector.load()
        data.exog = sm.add_constant(data.exog, prepend=False)
        #mod = sm.Probit(data.endog, data.exog)
        cls.mod = sm.Logit(data.endog, data.exog)
        #res = mod.fit(method="newton")
        cls.params = [np.array([1,0.25,1.4,-7])]
        ##loglike = mod.loglike
        ##score = mod.score
        ##hess = mod.hessian 
Example #8
Source File: business_case_solver_without_classes.py    From themarketingtechnologist with Apache License 2.0 5 votes vote down vote up
def run_logistic_regression(df):
    # Logistic regression
    X = df['pageviews_cumsum']
    X = sm.add_constant(X)
    y = df['is_conversion']
    logit = sm.Logit(y, X)
    logistic_regression_results = logit.fit()
    print(logistic_regression_results.summary())
    return logistic_regression_results 
Example #9
Source File: business_case_solver.py    From themarketingtechnologist with Apache License 2.0 5 votes vote down vote up
def run_logistic_regression(self):
        # Logistic regression
        X = self.df['pageviews_cumsum']
        X = sm.add_constant(X)
        y = self.df['is_conversion']
        logit = sm.Logit(y, X)
        self.logistic_regression_results = logit.fit()
        print self.logistic_regression_results.summary() 
Example #10
Source File: dominance.py    From dominance-analysis with MIT License 5 votes vote down vote up
def McFadden_RSquare(self,columns):
		cols=columns.copy()
		cols.append('intercept')
		# print("model columns :",cols)
		log_clf=sm.Logit(self.data[self.target],self.data[cols])
		# result=log_clf.fit(disp=0,method='powell')
		try:
			result=log_clf.fit(disp=0)
		except:
			result=log_clf.fit(disp=0,method='powell')
		mcfadden_rsquare=result.prsquared
		return mcfadden_rsquare 
Example #11
Source File: dominance.py    From dominance-analysis with MIT License 5 votes vote down vote up
def Estrella(self,columns):
		cols=columns.copy()
		cols.append('intercept')
		log_clf=sm.Logit(self.data[self.target],self.data[cols])
		N=self.data.shape[0]
		# result=log_clf.fit(disp=0,method='powell')
		try:
			result=log_clf.fit(disp=0)
		except:
			result=log_clf.fit(disp=0,method='powell')
		llf=result.llf
		llnull=result.llnull
		estrella_rsquare=1-((llf/llnull)**(-(2/N)*llnull))
		return estrella_rsquare 
Example #12
Source File: dominance.py    From dominance-analysis with MIT License 5 votes vote down vote up
def Adjusted_McFadden_RSquare(self,columns):
		log_clf=sm.Logit(self.data[self.target],self.data[cols])
		# result=log_clf.fit(disp=0,method='powell')
		try:
			result=log_clf.fit(disp=0)
		except:
			result=log_clf.fit(disp=0,method='powell')
		llf=result.llf
		llnull=result.llnull
		adjusted_mcfadden_rsquare=1-((llf-len(columns))/llnull)
		return adjusted_mcfadden_rsquare 
Example #13
Source File: propensity_weighted_ols.py    From whynot with MIT License 4 votes vote down vote up
def estimate_treatment_effect(covariates, treatment, outcome):
    """Estimate treatment effects using propensity weighted least-squares.

    Parameters
    ----------
        covariates: `np.ndarray`
            Array of shape [num_samples, num_features] of features
        treatment:  `np.ndarray`
            Binary array of shape [num_samples]  indicating treatment status for each
            sample.
        outcome:  `np.ndarray`
            Array of shape [num_samples] containing the observed outcome for each sample.

    Returns
    -------
        result: `whynot.framework.InferenceResult`
            InferenceResult object for this procedure

    """
    start_time = perf_counter()

    # Compute propensity scores with logistic regression model.
    features = sm.add_constant(covariates, prepend=True, has_constant="add")
    logit = sm.Logit(treatment, features)
    model = logit.fit(disp=0)
    propensities = model.predict(features)

    # IP-weights
    treated = treatment == 1.0
    untreated = treatment == 0.0
    weights = treated / propensities + untreated / (1.0 - propensities)

    treatment = treatment.reshape(-1, 1)
    features = np.concatenate([treatment, covariates], axis=1)
    features = sm.add_constant(features, prepend=True, has_constant="add")

    model = sm.WLS(outcome, features, weights=weights)
    results = model.fit()
    stop_time = perf_counter()

    # Treatment is the second variable (after the constant offset)
    ate = results.params[1]
    stderr = results.bse[1]
    conf_int = tuple(results.conf_int()[1])

    return InferenceResult(
        ate=ate,
        stderr=stderr,
        ci=conf_int,
        individual_effects=None,
        elapsed_time=stop_time - start_time,
    ) 
Example #14
Source File: propensity_score_matching.py    From whynot with MIT License 4 votes vote down vote up
def estimate_treatment_effect(covariates, treatment, outcome):
    """Estimate treatment effects using propensity score matching.

    Parameters
    ----------
        covariates: `np.ndarray`
            Array of shape [num_samples, num_features] of features
        treatment:  `np.ndarray`
            Binary array of shape [num_samples]  indicating treatment status for each
            sample.
        outcome:  `np.ndarray`
            Array of shape [num_samples] containing the observed outcome for each sample.

    Returns
    -------
        result: `whynot.framework.InferenceResult`
            InferenceResult object for this procedure

    """
    start_time = perf_counter()

    # Compute propensity scores with logistic regression model.
    features = sm.add_constant(covariates, has_constant="add")
    logit = sm.Logit(treatment, features)
    model = logit.fit(disp=0)
    propensity_scores = model.predict(features)

    matched_treatment, matched_outcome, matched_weights = get_matched_dataset(
        treatment, propensity_scores, outcome
    )

    ate = compute_ate(matched_outcome, matched_treatment, matched_weights)

    # Bootstrap confidence intervals
    samples = []
    num_units = len(matched_treatment)
    for _ in range(1000):
        sample_idxs = np.random.choice(num_units, size=num_units, replace=True)
        samples.append(
            compute_ate(
                matched_outcome[sample_idxs],
                matched_treatment[sample_idxs],
                matched_weights[sample_idxs],
            )
        )
    conf_int = (np.quantile(samples, 0.025), np.quantile(samples, 0.975))
    stop_time = perf_counter()

    return InferenceResult(
        ate=ate,
        stderr=None,
        ci=conf_int,
        individual_effects=None,
        elapsed_time=stop_time - start_time,
    )