Python sklearn.linear_model.LogisticRegression() Examples

The following are 30 code examples of sklearn.linear_model.LogisticRegression(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.linear_model , or try the search function .
Example #1
Source File: create_ngrams.py    From rasa_lookup_demo with Apache License 2.0 8 votes vote down vote up
def run_logreg(X_train, y_train, selection_threshold=0.2):
    print("\nrunning logistic regression...")
    print("using a selection threshold of {}".format(selection_threshold))
    pipe = Pipeline(
        [
            (
                "feature_selection",
                RandomizedLogisticRegression(selection_threshold=selection_threshold),
            ),
            ("classification", LogisticRegression()),
        ]
    )
    pipe.fit(X_train, y_train)
    print("training accuracy : {}".format(pipe.score(X_train, y_train)))
    print("testing accuracy : {}".format(pipe.score(X_test, y_test)))
    return pipe 
Example #2
Source File: classifier.py    From Video-Highlight-Detection with MIT License 7 votes vote down vote up
def _build_model(self,model_name,params=None):
        if params==None:
            if model_name=='xgb':
                self.model=XGBClassifier(n_estimators=100,learning_rate=0.02)
            elif model_name=='svm':
                kernel_function=chi2_kernel if not (self.model_kernel=='linear' or self.model_kernel=='rbf') else self.model_kernel
                self.model=SVC(C=1,kernel=kernel_function,gamma=1,probability=True)
            elif model_name=='lr':
                self.model=LR(C=1,penalty='l1',tol=1e-6)
        else:
            if model_name=='xgb':
                self.model=XGBClassifier(n_estimators=1000,learning_rate=0.02,**params)
            elif model_name=='svm':
                self.model=SVC(C=1,kernel=kernel_function,gamma=1,probability=True)
            elif model_name=='lr':
                self.model=LR(C=1,penalty='l1',tol=1e-6)

        log.l.info('=======> built the model {} done'.format(self.model_name)) 
Example #3
Source File: utils.py    From contextualbandits with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _check_autograd_supported(base_algorithm):
    supported = ['LogisticRegression', 'SGDClassifier', 'RidgeClassifier', 'StochasticLogisticRegression', 'LinearRegression']
    if not base_algorithm.__class__.__name__ in supported:
        raise ValueError("Automatic gradients only implemented for the following classes: " + ", ".join(supported))
    if base_algorithm.__class__.__name__ == 'LogisticRegression':
        if base_algorithm.penalty != 'l2':
            raise ValueError("Automatic gradients only defined for LogisticRegression with l2 regularization.")
        if base_algorithm.intercept_scaling != 1:
            raise ValueError("Automatic gradients for LogisticRegression not implemented with 'intercept_scaling'.")

    if base_algorithm.__class__.__name__ == 'RidgeClassifier':
        if base_algorithm.normalize:
            raise ValueError("Automatic gradients for LogisticRegression only implemented without 'normalize'.")

    if base_algorithm.__class__.__name__ == 'SGDClassifier':
        if base_algorithm.loss != 'log':
            raise ValueError("Automatic gradients for LogisticRegression only implemented with logistic loss.")
        if base_algorithm.penalty != 'l2':
            raise ValueError("Automatic gradients only defined for LogisticRegression with l2 regularization.")
    
    try:
        if base_algorithm.class_weight is not None:
            raise ValueError("Automatic gradients for LogisticRegression not supported with 'class_weight'.")
    except:
        pass 
Example #4
Source File: utils.py    From contextualbandits with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def __init__(self, lambda_=1., fit_intercept=True, alpha=0.95,
                 m=1.0, ts=False, ts_from_ci=True, sample_unique=False, random_state=1):
        self.conf_coef = alpha
        self.m = m
        self.fit_intercept = fit_intercept
        self.lambda_ = lambda_
        self.ts = ts
        self.ts_from_ci = ts_from_ci
        self.warm_start = True
        self.sample_unique = bool(sample_unique)
        self.random_state = _check_random_state(random_state)
        self.is_fitted = False
        self.model = LogisticRegression(C=1./lambda_, penalty="l2",
                                        fit_intercept=fit_intercept,
                                        solver='lbfgs', max_iter=15000,
                                        warm_start=True)
        self.Sigma = np.empty((0,0), dtype=np.float64) 
Example #5
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 6 votes vote down vote up
def test_bad_params(self):
        X = [[1]]
        y = [0]

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, C=-1).fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, C=1.2).fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, max_iter=-1).fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, max_iter="100").fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, tol=-1).fit(X, y)

        with self.assertRaises(ValueError):
            LogisticRegression(data_norm=1, tol="1").fit(X, y) 
Example #6
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 6 votes vote down vote up
def test_same_results(self):
        from sklearn import datasets
        from sklearn.model_selection import train_test_split
        from sklearn import linear_model

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12, epsilon=float("inf"))
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        self.assertTrue(np.all(predict1 == predict2)) 
Example #7
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 6 votes vote down vote up
def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.array(
            [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75,
             5.00, 5.50])
        y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]
        X -= 3.0
        X /= 2.5

        clf = LogisticRegression(epsilon=2, data_norm=1.0, accountant=acc)
        clf.fit(X, y)
        self.assertEqual((2, 0), acc.total())

        with BudgetAccountant(3, 0) as acc2:
            clf = LogisticRegression(epsilon=2, data_norm=1.0)
            clf.fit(X, y)
            self.assertEqual((2, 0), acc2.total())

            with self.assertRaises(BudgetError):
                clf.fit(X, y) 
Example #8
Source File: common_utils.py    From interpret-text with MIT License 6 votes vote down vote up
def create_logistic_vectorizer():
    vectorizer = CountVectorizer(lowercase=False, min_df=0.0, binary=True)
    lr = LogisticRegression(random_state=777)
    return Pipeline([("vectorizer", vectorizer), ("lr", lr)]) 
Example #9
Source File: top_factors.py    From healthcareai-py with MIT License 6 votes vote down vote up
def prepare_fit_model_for_factors(model_type, x_train, y_train):
    """
    Given a model type, train and test data
    
    Args:
        model_type (str): 'classification' or 'regression'
        x_train:
        y_train:

    Returns:
        (sklearn.base.BaseEstimator): A fit model.
    """

    if model_type == 'classification':
        algorithm = LogisticRegression()
    elif model_type == 'regression':
        algorithm = LinearRegression()
    else:
        algorithm = None

    if algorithm is not None:
        algorithm.fit(x_train, y_train)

    return algorithm 
Example #10
Source File: maximum_margin_reduction.py    From libact with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def __init__(self, *args, **kwargs):
        super(MaximumLossReductionMaximalConfidence, self).__init__(*args, **kwargs)

        # self.n_labels = len(self.dataset.get_labeled_entries()[0][1])
        self.n_labels = len(self.dataset.get_labeled_entries()[1][0])

        random_state = kwargs.pop('random_state', None)
        self.random_state_ = seed_random_state(random_state)

        self.logreg_param = kwargs.pop('logreg_param',
                                       {'multi_class': 'multinomial',
                                        'solver': 'newton-cg',
                                        'random_state': random_state})
        self.logistic_regression_ = LogisticRegression(**self.logreg_param)

        self.br_base = kwargs.pop('br_base',
              SklearnProbaAdapter(SVC(kernel='linear',
                                      probability=True,
                                      gamma="auto",
                                      random_state=random_state))) 
Example #11
Source File: train_sampling_unsupervised.py    From dgl with Apache License 2.0 6 votes vote down vote up
def compute_acc(emb, labels, train_nids, val_nids, test_nids):
    """
    Compute the accuracy of prediction given the labels.
    """
    emb = emb.cpu().numpy()
    train_nids = train_nids.cpu().numpy()
    train_labels = labels[train_nids].cpu().numpy()
    val_nids = val_nids.cpu().numpy()
    val_labels = labels[val_nids].cpu().numpy()
    test_nids = test_nids.cpu().numpy()
    test_labels = labels[test_nids].cpu().numpy()

    emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True)

    lr = lm.LogisticRegression(multi_class='multinomial', max_iter=10000)
    lr.fit(emb[train_nids], labels[train_nids])

    pred = lr.predict(emb)
    f1_micro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='micro')
    f1_micro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='micro')
    f1_macro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='macro')
    f1_macro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='macro')
    return f1_micro_eval, f1_micro_test 
Example #12
Source File: stability_selection.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, base_estimator=LogisticRegression(penalty='l1'), lambda_name='C',
                 lambda_grid=np.logspace(-5, -2, 25), n_bootstrap_iterations=100,
                 sample_fraction=0.5, threshold=0.6, bootstrap_func=bootstrap_without_replacement,
                 bootstrap_threshold=None, verbose=0, n_jobs=1, pre_dispatch='2*n_jobs',
                 random_state=None):
        self.base_estimator = base_estimator
        self.lambda_name = lambda_name
        self.lambda_grid = lambda_grid
        self.n_bootstrap_iterations = n_bootstrap_iterations
        self.sample_fraction = sample_fraction
        self.threshold = threshold
        self.bootstrap_func = bootstrap_func
        self.bootstrap_threshold = bootstrap_threshold
        self.verbose = verbose
        self.n_jobs = n_jobs
        self.pre_dispatch = pre_dispatch
        self.random_state = random_state 
Example #13
Source File: test_run.py    From nyaggle with MIT License 6 votes vote down vote up
def test_experiment_sklearn_classifier(tmpdir_name):
    X, y = make_classification_df(n_samples=1024, n_num_features=10, n_cat_features=0,
                                  class_sep=0.98, random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'C': 0.1
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name, eval_func=roc_auc_score,
                            algorithm_type=LogisticRegression, with_auto_prep=False)

    assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
    assert len(np.unique(result.test_prediction)) > 5
    assert roc_auc_score(y_train, result.oof_prediction) >= 0.8
    assert roc_auc_score(y_test, result.test_prediction) >= 0.8

    _check_file_exists(tmpdir_name) 
Example #14
Source File: test_iw.py    From libTLDA with MIT License 5 votes vote down vote up
def test_regularization():
    """Test for fitting the model."""
    X = rnd.randn(10, 2)
    y = np.hstack((-np.ones((5,)), np.ones((5,))))
    Z = rnd.randn(10, 2) + 1
    clf = ImportanceWeightedClassifier(loss_function='lr',
                                       l2_regularization=None)
    assert isinstance(clf.clf, LogisticRegressionCV)
    clf = ImportanceWeightedClassifier(loss_function='lr',
                                       l2_regularization=1.0)
    assert isinstance(clf.clf, LogisticRegression) 
Example #15
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_not_none(self):
        self.assertIsNotNone(LogisticRegression) 
Example #16
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_one_class(self):
        X = [[1]]
        y = [0]

        clf = LogisticRegression(data_norm=1)

        with self.assertRaises(ValueError):
            clf.fit(X, y) 
Example #17
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_no_params(self):
        clf = LogisticRegression()

        X = np.array(
            [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75,
             5.00, 5.50])
        y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]

        with self.assertWarns(PrivacyLeakWarning):
            clf.fit(X, y) 
Example #18
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_large_norm(self):
        X = np.array(
            [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75,
             5.00, 5.50])
        y = np.array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]

        clf = LogisticRegression(data_norm=1.0)

        self.assertIsNotNone(clf.fit(X, y)) 
Example #19
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_trinomial(self):
        X = np.array(
            [0.50, 0.75, 1.00])
        y = np.array([0, 1, 2])
        X = X[:, np.newaxis]

        clf = LogisticRegression(data_norm=1.0)

        self.assertIsNotNone(clf.fit(X, y)) 
Example #20
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_solver_warning(self):
        with self.assertWarns(DiffprivlibCompatibilityWarning):
            LogisticRegression(solver="newton-cg") 
Example #21
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_multi_class_warning(self):
        with self.assertWarns(DiffprivlibCompatibilityWarning):
            LogisticRegression(multi_class="multinomial") 
Example #22
Source File: test_LogisticRegression.py    From differential-privacy-library with MIT License 5 votes vote down vote up
def test_different_results(self):
        from sklearn import datasets
        from sklearn import linear_model
        from sklearn.model_selection import train_test_split

        dataset = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)

        clf = LogisticRegression(data_norm=12)
        clf.fit(X_train, y_train)

        predict1 = clf.predict(X_test)

        clf = LogisticRegression(data_norm=12)
        clf.fit(X_train, y_train)

        predict2 = clf.predict(X_test)

        clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
        clf.fit(X_train, y_train)

        predict3 = clf.predict(X_test)

        self.assertFalse(np.all(predict1 == predict2))
        self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2)) 
Example #23
Source File: common_utils.py    From interpret-text with MIT License 5 votes vote down vote up
def create_sklearn_logistic_regressor(X, y, pipeline=False):
    lin = linear_model.LogisticRegression()
    if pipeline:
        lin = Pipeline([("lin", lin)])
    model = lin.fit(X, y)
    return model 
Example #24
Source File: simulation_exp4p.py    From striatum with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def train_expert(history_context, history_action):
    n_round = len(history_context)
    history_context = np.array([history_context[t] for t in range(n_round)])
    history_action = np.array([history_action[t] for t in range(n_round)])
    logreg = OneVsRestClassifier(LogisticRegression())
    mnb = OneVsRestClassifier(MultinomialNB())
    logreg.fit(history_context, history_action)
    mnb.fit(history_context, history_action)
    return [logreg, mnb] 
Example #25
Source File: movielens_bandit.py    From striatum with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def train_expert(action_context):
    logreg = OneVsRestClassifier(LogisticRegression())
    mnb = OneVsRestClassifier(MultinomialNB(), )
    logreg.fit(action_context.iloc[:, 2:], action_context.iloc[:, 1])
    mnb.fit(action_context.iloc[:, 2:], action_context.iloc[:, 1])
    return [logreg, mnb] 
Example #26
Source File: classifiers.py    From recordlinkage with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self,
                 coefficients=None,
                 intercept=None,
                 **kwargs):
        super(LogisticRegressionClassifier, self).__init__()

        self.kernel = linear_model.LogisticRegression(**kwargs)

        self.coefficients = coefficients
        self.intercept = intercept 
Example #27
Source File: classifiers.py    From seizure-prediction with MIT License 5 votes vote down vote up
def make_lr(C):
    cls = sklearn.pipeline.make_pipeline(StandardScaler(), LogisticRegression(C=C))
    name = 'ss-lr-C%.4f' % C
    return (cls, name) 
Example #28
Source File: model_loop.py    From fake-news-detection with MIT License 5 votes vote down vote up
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             } 
Example #29
Source File: test_stacker.py    From xcessiv with Apache License 2.0 5 votes vote down vote up
def setUp(self):
        bl1 = RandomForestClassifier(random_state=8)
        bl2 = LogisticRegression()
        bl3 = RandomForestClassifier(max_depth=10, random_state=10)

        meta_est = LogisticRegression()

        skf = StratifiedKFold(random_state=8).split

        self.stacked_ensemble = stacker.XcessivStackedEnsemble(
            [bl1, bl2, bl3],
            ['predict', 'predict_proba', 'predict_proba'],
            meta_est,
            skf
        ) 
Example #30
Source File: main.py    From spotify-tensorflow with Apache License 2.0 5 votes vote down vote up
def main():
    # Enable eager execution for DataFrame endpoint
    import tensorflow as tf
    tf.enable_eager_execution()

    # Set up training data
    train_data_dir = get_data_dir("train")
    train_data = os.path.join(train_data_dir, "part-*")
    schema_path = os.path.join(train_data_dir, "_inferred_schema.pb")

    df_train_data = next(Datasets.dataframe.examples_via_schema(train_data,
                                                                schema_path,
                                                                batch_size=1024))

    # the feature keys are ordered alphabetically for determinism
    label_keys = sorted([l for l in set(df_train_data.columns) if l.startswith("class_name")])
    feature_keys = sorted(set(df_train_data.columns).difference(label_keys))

    label = df_train_data[label_keys].apply(transform_labels, axis=1)
    features = df_train_data[feature_keys]

    # Build model
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(multi_class="multinomial", solver="newton-cg")
    model.fit(features, label)

    # Set up eval data
    eval_data_dir = get_data_dir("eval")
    eval_data = os.path.join(eval_data_dir, "part-*")
    df_eval_data = next(Datasets.dataframe.examples_via_schema(eval_data,
                                                               schema_path,
                                                               batch_size=1024))

    eval_label = df_eval_data[label_keys].apply(transform_labels, axis=1)
    eval_features = df_eval_data[feature_keys]

    # Evaluate model
    score = model.score(eval_features, eval_label)
    print("Score is %f" % score)