Python sklearn.linear_model.SGDClassifier() Examples

The following are 30 code examples for showing how to use sklearn.linear_model.SGDClassifier(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.linear_model , or try the search function .

Example 1
Project: nyoka   Author: nyoka-pmml   File: _validateSchema.py    License: Apache License 2.0 7 votes vote down vote up
def test_validate_sklearn_sgd_with_text_cv(self):
        categories = ['alt.atheism','talk.religion.misc']
        data = fetch_20newsgroups(subset='train', categories=categories)
        X = data.data[:4]
        Y = data.target[:4]
        features = ['input']
        target = 'output'
        model = SGDClassifier(loss="log")
        file_name = model.__class__.__name__ + '_CountVec_.pmml'
        pipeline = Pipeline([
            ('vect', CountVectorizer()),
            ('clf', model)
        ])
        pipeline.fit(X, Y)
        skl_to_pmml(pipeline, features , target, file_name)
        self.assertEqual(self.schema.is_valid(file_name), True) 
Example 2
Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_robust_weighted_estimator.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_corrupted_classif(loss, weighting):
    clf = RobustWeightedEstimator(
        SGDClassifier(),
        loss=loss,
        max_iter=50,
        weighting=weighting,
        k=5,
        c=None,
        random_state=rng,
    )
    clf.fit(X_cc, y_cc)
    score = clf.score(X_cc, y_cc)
    assert score > 0.75


# Classification test without outliers 
Example 3
Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_robust_weighted_estimator.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_not_robust_classif(loss, weighting):
    clf = RobustWeightedEstimator(
        SGDClassifier(),
        loss=loss,
        max_iter=100,
        weighting=weighting,
        k=0,
        c=1e7,
        burn_in=0,
        random_state=rng,
    )
    clf_not_rob = SGDClassifier(loss=loss, random_state=rng)
    clf.fit(X_c, y_c)
    clf_not_rob.fit(X_c, y_c)
    pred1 = clf.base_estimator_.decision_function(X_c)
    pred2 = clf_not_rob.decision_function(X_c)

    assert (
        np.linalg.norm(pred1 - pred2) / np.linalg.norm(pred2)
        - np.linalg.norm(pred1 - y_c) / np.linalg.norm(y_c)
        < 0.1
    )


# Case "log" loss, test predict_proba 
Example 4
Project: scikit-learn-extra   Author: scikit-learn-contrib   File: test_robust_weighted_estimator.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_predict_proba(weighting):
    clf = RobustWeightedEstimator(
        SGDClassifier(loss="log"),
        loss="log",
        max_iter=100,
        weighting=weighting,
        k=0,
        c=1e7,
        burn_in=0,
        random_state=rng,
    )
    clf_not_rob = SGDClassifier(loss="log", random_state=rng)
    clf.fit(X_c, y_c)
    clf_not_rob.fit(X_c, y_c)
    pred1 = clf.base_estimator_.predict_proba(X_c)[:, 1]
    pred2 = clf_not_rob.predict_proba(X_c)[:, 1]

    assert (
        np.linalg.norm(pred1 - pred2) / np.linalg.norm(pred2)
        - np.linalg.norm(pred1 - y_c) / np.linalg.norm(y_c)
        < 0.1
    )


# Regression test with outliers 
Example 5
def demo():

    # The classifier we will use (other options: SAMKNNClassifier, LeveragingBaggingClassifier, SGD)
    h1 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
    h2 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
    h3 = [HoeffdingTreeClassifier(), SAMKNNClassifier(), LeveragingBaggingClassifier(random_state=1), SGDClassifier()]
    model_names = ['HT', 'SAMKNNClassifier', 'LBkNN', 'SGDC']

    # Demo 1 -- plot should not fail
    demo_parameterized(h1, model_names=model_names)

    # Demo 2 -- csv output should look nice
    demo_parameterized(h2, "sea_stream.csv", False, model_names)

    # Demo 3 -- should not give "'NoneType' object is not iterable" error
    demo_parameterized(h3, "covtype.csv", False, model_names) 
Example 6
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_from_model.py    License: MIT License 6 votes vote down vote up
def test_prefit():
    # Test all possible combinations of the prefit parameter.

    # Passing a prefit parameter with the selected model
    # and fitting a unfit model with prefit=False should give same results.
    clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
                        random_state=0, tol=None)
    model = SelectFromModel(clf)
    model.fit(data, y)
    X_transform = model.transform(data)
    clf.fit(data, y)
    model = SelectFromModel(clf, prefit=True)
    assert_array_almost_equal(model.transform(data), X_transform)

    # Check that the model is rewritten if prefit=False and a fitted model is
    # passed
    model = SelectFromModel(clf, prefit=False)
    model.fit(data, y)
    assert_array_almost_equal(model.transform(data), X_transform)

    # Check that prefit=True and calling fit raises a ValueError
    model = SelectFromModel(clf, prefit=True)
    assert_raises(ValueError, model.fit, data, y) 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_multioutput.py    License: MIT License 6 votes vote down vote up
def test_multi_output_classification_partial_fit_parallelism():
    sgd_linear_clf = SGDClassifier(loss='log', random_state=1, max_iter=5)
    mor = MultiOutputClassifier(sgd_linear_clf, n_jobs=4)
    mor.partial_fit(X, y, classes)
    est1 = mor.estimators_[0]
    mor.partial_fit(X, y)
    est2 = mor.estimators_[0]
    if cpu_count() > 1:
        # parallelism requires this to be the case for a sane implementation
        assert est1 is not est2


# check predict_proba passes 
Example 8
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_multioutput.py    License: MIT License 6 votes vote down vote up
def test_multi_output_predict_proba():
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3)
    param = {'loss': ('hinge', 'log', 'modified_huber')}

    # inner function for custom scoring
    def custom_scorer(estimator, X, y):
        if hasattr(estimator, "predict_proba"):
            return 1.0
        else:
            return 0.0
    grid_clf = GridSearchCV(sgd_linear_clf, param_grid=param,
                            scoring=custom_scorer, cv=3, error_score=np.nan)
    multi_target_linear = MultiOutputClassifier(grid_clf)
    multi_target_linear.fit(X, y)

    multi_target_linear.predict_proba(X)

    # SGDClassifier defaults to loss='hinge' which is not a probabilistic
    # loss function; therefore it does not expose a predict_proba method
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=5, tol=1e-3)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
    multi_target_linear.fit(X, y)
    err_msg = "The base estimator should implement predict_proba method"
    with pytest.raises(ValueError, match=err_msg):
        multi_target_linear.predict_proba(X)


# 0.23. warning about tol not having its correct default value. 
Example 9
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_multioutput.py    License: MIT License 6 votes vote down vote up
def test_multi_output_classification_partial_fit_sample_weights():
    # weighted classifier
    Xw = [[1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
    yw = [[3, 2], [2, 3], [3, 2]]
    w = np.asarray([2., 1., 1.])
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
    clf_w = MultiOutputClassifier(sgd_linear_clf)
    clf_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6], [1.5, 2.5, 3.5]]
    y = [[3, 2], [3, 2], [2, 3], [3, 2]]
    sgd_linear_clf = SGDClassifier(random_state=1, max_iter=20)
    clf = MultiOutputClassifier(sgd_linear_clf)
    clf.fit(X, y)
    X_test = [[1.5, 2.5, 3.5]]
    assert_array_almost_equal(clf.predict(X_test), clf_w.predict(X_test)) 
Example 10
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_sgd.py    License: MIT License 6 votes vote down vote up
def test_sgd_predict_proba_method_access(klass):
    # Checks that SGDClassifier predict_proba and predict_log_proba methods
    # can either be accessed or raise an appropriate error message
    # otherwise. See
    # https://github.com/scikit-learn/scikit-learn/issues/10938 for more
    # details.
    for loss in linear_model.SGDClassifier.loss_functions:
        clf = SGDClassifier(loss=loss)
        if loss in ('log', 'modified_huber'):
            assert hasattr(clf, 'predict_proba')
            assert hasattr(clf, 'predict_log_proba')
        else:
            message = ("probability estimates are not "
                       "available for loss={!r}".format(loss))
            assert not hasattr(clf, 'predict_proba')
            assert not hasattr(clf, 'predict_log_proba')
            with pytest.raises(AttributeError,
                               match=message):
                clf.predict_proba
            with pytest.raises(AttributeError,
                               match=message):
                clf.predict_log_proba 
Example 11
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_sgd.py    License: MIT License 6 votes vote down vote up
def test_l1_ratio():
    # Test if l1 ratio extremes match L1 and L2 penalty settings.
    X, y = datasets.make_classification(n_samples=1000,
                                        n_features=100, n_informative=20,
                                        random_state=1234)

    # test if elasticnet with l1_ratio near 1 gives same result as pure l1
    est_en = SGDClassifier(alpha=0.001, penalty='elasticnet', tol=None,
                           max_iter=6, l1_ratio=0.9999999999,
                           random_state=42).fit(X, y)
    est_l1 = SGDClassifier(alpha=0.001, penalty='l1', max_iter=6,
                           random_state=42, tol=None).fit(X, y)
    assert_array_almost_equal(est_en.coef_, est_l1.coef_)

    # test if elasticnet with l1_ratio near 0 gives same result as pure l2
    est_en = SGDClassifier(alpha=0.001, penalty='elasticnet', tol=None,
                           max_iter=6, l1_ratio=0.0000000001,
                           random_state=42).fit(X, y)
    est_l2 = SGDClassifier(alpha=0.001, penalty='l2', max_iter=6,
                           random_state=42, tol=None).fit(X, y)
    assert_array_almost_equal(est_en.coef_, est_l2.coef_) 
Example 12
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_logistic.py    License: MIT License 6 votes vote down vote up
def test_elastic_net_versus_sgd(C, l1_ratio):
    # Compare elasticnet penalty in LogisticRegression() and SGD(loss='log')
    n_samples = 500
    X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=5,
                               n_informative=5, n_redundant=0, n_repeated=0,
                               random_state=1)
    X = scale(X)

    sgd = SGDClassifier(
        penalty='elasticnet', random_state=1, fit_intercept=False, tol=-np.inf,
        max_iter=2000, l1_ratio=l1_ratio, alpha=1. / C / n_samples, loss='log')
    log = LogisticRegression(
        penalty='elasticnet', random_state=1, fit_intercept=False, tol=1e-5,
        max_iter=1000, l1_ratio=l1_ratio, C=C, solver='saga')

    sgd.fit(X, y)
    log.fit(X, y)
    assert_array_almost_equal(sgd.coef_, log.coef_, decimal=1) 
Example 13
Project: textar   Author: datosgobar   File: text_classifier.py    License: MIT License 6 votes vote down vote up
def make_classifier(self, name, ids, labels):
        """Entrenar un clasificador SVM sobre los textos cargados.

        Crea un clasificador que se guarda en el objeto bajo el nombre `name`.

        Args:
            name (str): Nombre para el clasidicador.
            ids (list): Se espera una lista de N ids de textos ya almacenados
                en el TextClassifier.
            labels (list): Se espera una lista de N etiquetas. Una por cada id
                de texto presente en ids.
        Nota:
            Usa el clasificador de `Scikit-learn <http://scikit-learn.org/>`_
        """
        if not all(np.in1d(ids, self.ids)):
            raise ValueError("Hay ids de textos que no se encuentran \
                              almacenados.")
        setattr(self, name, SGDClassifier())
        classifier = getattr(self, name)
        indices = np.searchsorted(self.ids, ids)
        classifier.fit(self.tfidf_mat[indices, :], labels) 
Example 14
Project: text-mining-class   Author: ogrisel   File: language_detector.py    License: MIT License 6 votes vote down vote up
def build_language_classifier(texts, labels, verbose=False, random_state=None):
    """Train a text classifier with scikit-learn

    The text classifier is composed of two elements assembled in a pipeline:

    - A text feature extractor (`TfidfVectorizer`) that extract the relative
      frequencies of unigrams, bigrams and trigrams of characters in the text.

    - An instance of `SGDClassifier` for the classification it-self. To speed
      up training it is recommended to enable early stopping.

    `random_state` is passed to the underlying `SGDClassifier` instance.
    """
    language_classifier = make_pipeline(
        TfidfVectorizer(analyzer="char", ngram_range=(1, 3),
                        min_df=2, max_df=0.9, norm="l2", dtype=np.float32),
        SGDClassifier(early_stopping=True, validation_fraction=0.2,
                      n_iter_no_change=3, max_iter=1000, tol=1e-3,
                      alpha=1e-5, penalty="l2", verbose=verbose,
                      random_state=random_state)
    )
    return language_classifier.fit(texts, labels) 
Example 15
Project: nyoka   Author: nyoka-pmml   File: testScoreWithAdapaSklearn.py    License: Apache License 2.0 6 votes vote down vote up
def test_09_sgd_classifier(self):
        print("\ntest 09 (SGD Classifier with preprocessing) [multi-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_multi_class_classification()

        model = SGDClassifier(loss="log")
        pipeline_obj = Pipeline([
            ("scaler", StandardScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test09sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example 16
Project: nyoka   Author: nyoka-pmml   File: testScoreWithAdapaSklearn.py    License: Apache License 2.0 6 votes vote down vote up
def test_10_sgd_classifier(self):
        print("\ntest 10 (SGD Classifier with preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = SGDClassifier(loss="log")
        pipeline_obj = Pipeline([
            ("scaler", StandardScaler()),
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test10sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = pipeline_obj.predict_proba(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True) 
Example 17
Project: nyoka   Author: nyoka-pmml   File: _validateSchema.py    License: Apache License 2.0 6 votes vote down vote up
def test_validate_sklearn_sgd_with_text(self):
        categories = ['alt.atheism','talk.religion.misc']
        data = fetch_20newsgroups(subset='train', categories=categories)
        X = data.data[:4]
        Y = data.target[:4]
        features = ['input']
        target = 'output'
        model = SGDClassifier(loss="log")
        file_name = model.__class__.__name__ + '_TfIdfVec_.pmml'
        pipeline = Pipeline([
            ('vect', TfidfVectorizer()),
            ('clf', model)
        ])
        pipeline.fit(X, Y)
        skl_to_pmml(pipeline, features , target, file_name)
        self.assertEqual(self.schema.is_valid(file_name), True) 
Example 18
Project: jh-kaggle-util   Author: jeffheaton   File: ensemble_glm.py    License: Apache License 2.0 6 votes vote down vote up
def fit_ensemble(x,y):
    fit_type = jhkaggle.jhkaggle_config['FIT_TYPE']
    if 1:
        if fit_type == jhkaggle.const.FIT_TYPE_BINARY_CLASSIFICATION:
            blend = SGDClassifier(loss="log", penalty="elasticnet")  # LogisticRegression()
        else:
            # blend = SGDRegressor()
            #blend = LinearRegression()
            #blend = RandomForestRegressor(n_estimators=10, n_jobs=-1, max_depth=5, criterion='mae')
            blend = LassoLarsCV(normalize=True)
            #blend = ElasticNetCV(normalize=True)
            #blend = LinearRegression(normalize=True)
        blend.fit(x, y)
    else:
        blend = LogisticRegression()
        blend.fit(x, y)


    return blend 
Example 19
Project: Splunking-Crime   Author: nccgroup   File: SGDClassifier.py    License: GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            bools=['fit_intercept'],
            ints=['random_state', 'n_iter'],
            floats=['l1_ratio', 'alpha', 'eta0', 'power_t'],
            strs=['loss', 'penalty', 'learning_rate'],
        )

        if 'loss' in out_params:
            try:
                assert (out_params['loss'] in ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'])
            except AssertionError:
                raise RuntimeError(
                    'Value for parameter "loss" has to be one of "hinge", "log", "modified_huber", "squared_hinge", or "perceptron"')

        self.scaler = StandardScaler()
        self.estimator = _SGDClassifier(**out_params) 
Example 20
Project: ReAgent   Author: facebookresearch   File: linear_trainers.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def train(self, data: TrainingData, iterations: int = 1, num_samples: int = 0):
        logging.info("SGDClassifierTrainer.train...")
        self._model = None
        best_score = float("-inf")
        for _ in range(iterations):
            x, y, _ = super()._sample(
                data.train_x, data.train_y, data.train_weight, num_samples, True
            )
            sx, sy, ssw = super()._sample(
                data.validation_x, data.validation_y, data.validation_weight
            )
            for alpha in np.logspace(-8, -1, num=8, base=10):
                model = SGDClassifier(
                    loss=self._loss,
                    alpha=alpha,
                    random_state=0,
                    max_iter=self._max_iter,
                )
                model.fit(x, y)
                score = model.score(sx, sy, ssw)
                logging.info(f"  alpha: {alpha}, score: {score}")
                if score > best_score:
                    best_score = score
                    self._model = model 
Example 21
Project: dask-ml   Author: dask   File: test_incremental.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_incremental_text_pipeline(container):
    X = pd.Series(["a list", "of words", "for classification"] * 100)
    X = dd.from_pandas(X, npartitions=3)

    if container == "bag":
        X = X.to_bag()

    y = da.from_array(np.array([0, 0, 1] * 100), chunks=(100,) * 3)

    assert tuple(X.map_partitions(len).compute()) == y.chunks[0]

    sgd = SGDClassifier(max_iter=5, tol=1e-3)
    clf = Incremental(sgd, scoring="accuracy", assume_equal_chunks=True)
    vect = dask_ml.feature_extraction.text.HashingVectorizer()
    pipe = make_pipeline(vect, clf)

    pipe.fit(X, y, incremental__classes=[0, 1])
    X2 = pipe.steps[0][1].transform(X)
    assert hasattr(clf, "coef_")

    X2.compute_chunk_sizes()
    assert X2.shape == (300, vect.n_features) 
Example 22
Project: EDeN   Author: fabriziocosta   File: estimator.py    License: MIT License 5 votes vote down vote up
def set_params(self, r=3, d=8, nbits=16, discrete=True,
                   balance=False, subsample_size=200, ratio=2,
                   normalization=False, inner_normalization=False,
                   penalty='elasticnet'):
        """setter."""
        self.r = r
        self.d = d
        self.nbits = nbits
        self.normalization = normalization
        self.inner_normalization = inner_normalization
        self.discrete = discrete
        self.balance = balance
        self.subsample_size = subsample_size
        self.ratio = ratio
        if penalty == 'perceptron':
            self.model = Perceptron(max_iter=5, tol=None)
        else:
            self.model = SGDClassifier(
                average=True, class_weight='balanced', shuffle=True,
                penalty=penalty, max_iter=5, tol=None)
        self.vectorizer = Vectorizer(
            r=self.r, d=self.d,
            normalization=self.normalization,
            inner_normalization=self.inner_normalization,
            discrete=self.discrete,
            nbits=self.nbits)
        return self 
Example 23
Project: sklearn-audio-transfer-learning   Author: jordipons   File: audio_transfer_learning.py    License: ISC License 5 votes vote down vote up
def define_classification_model():
    """ Select and define the model you will use for the classifier. 
    """
    if config['model_type'] == 'linearSVM': # linearSVM can be faster than SVM
        return LinearSVC(C=1)
    elif config['model_type'] == 'SVM': # non-linearSVM, we can use the kernel trick
        return SVC(C=1, kernel='rbf', gamma='scale')
    elif config['model_type'] == 'kNN': # k-nearest neighbour
        return KNeighborsClassifier(n_neighbors=1, metric='cosine')
    elif config['model_type'] == 'perceptron': # otpimizes log-loss, also known as cross-entropy with sgd
        return SGDClassifier(max_iter=600, verbose=0.5, loss='log', learning_rate='optimal')
    elif config['model_type'] == 'MLP': # otpimizes log-loss, also known as cross-entropy with sgd
        return MLPClassifier(hidden_layer_sizes=(20,), max_iter=600, verbose=10, 
               solver='sgd', learning_rate='constant', learning_rate_init=0.001) 
Example 24
Project: transferlearning   Author: jindongwang   File: SCL.py    License: MIT License 5 votes vote down vote up
def fit(self, Xs, Xt):
        '''
        find pivot features and transfer the Xs and Xt
        Param Xs: source data
        Param Xt: target data
        output Xs_new: new source data features
        output Xt_new: new target data features
        output W: transform matrix
        '''
        _, ds = Xs.shape
        _, dt = Xt.shape
        assert ds == dt
        X = np.concatenate((Xs, Xt), axis=0)
        ix = np.argsort(np.sum(X, axis=0))
        ix = ix[::-1][:self.num_pivots]
        pivots = (X[:, ix]>0).astype('float')
        p = np.zeros((ds, self.num_pivots))
        # train for the classifers 
        for i in range(self.num_pivots):
            clf = linear_model.SGDClassifier(loss="modified_huber", alpha=self.l2)
            clf.fit(X, pivots[:, i])
            p[:, i] = clf.coef_
        _, W = np.linalg.eig(np.cov(p))
        W = W[:, :self.num_pivots].astype('float')
        self.W = W
        Xs_new = np.concatenate((np.dot(Xs, W), Xs), axis=1)
        Xt_new = np.concatenate((np.dot(Xt, W), Xt), axis=1)

        return Xs_new, Xt_new, W 
Example 25
Project: ConvLab   Author: ConvLab   File: Classifier.py    License: MIT License 5 votes vote down vote up
def train(self, X, y):
        model = SGDClassifier(loss="log", penalty="l2")
        model.probability=True
        model.fit(X,y)
        self.model = model 
Example 26
Project: fake-news-detection   Author: aldengolab   File: model_loop.py    License: MIT License 5 votes vote down vote up
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             } 
Example 27
Project: MNIST-baselines   Author: cxy1997   File: SGD.py    License: MIT License 5 votes vote down vote up
def SGD():
    loader = MnistLoader(flatten=True, data_path='../data', var_per=None)
    model = SGDClassifier(max_iter=30000)

    model.fit(loader.data_train, loader.label_train)
    print('model trained')
    res = model.score(loader.data_test, loader.label_test)
    print(res)

    return res 
Example 28
Project: tcav   Author: tensorflow   File: cav.py    License: Apache License 2.0 5 votes vote down vote up
def train(self, acts):
    """Train the CAVs from the activations.

    Args:
      acts: is a dictionary of activations. In particular, acts takes for of
            {'concept1':{'bottleneck name1':[...act array...],
                         'bottleneck name2':[...act array...],...
             'concept2':{'bottleneck name1':[...act array...],
    Raises:
      ValueError: if the model_type in hparam is not compatible.
    """

    tf.logging.info('training with alpha={}'.format(self.hparams.alpha))
    x, labels, labels2text = CAV._create_cav_training_set(
        self.concepts, self.bottleneck, acts)

    if self.hparams.model_type == 'linear':
      lm = linear_model.SGDClassifier(alpha=self.hparams.alpha, max_iter=self.hparams.max_iter, tol=self.hparams.tol)
    elif self.hparams.model_type == 'logistic':
      lm = linear_model.LogisticRegression()
    else:
      raise ValueError('Invalid hparams.model_type: {}'.format(
          self.hparams.model_type))

    self.accuracies = self._train_lm(lm, x, labels, labels2text)
    if len(lm.coef_) == 1:
      # if there were only two labels, the concept is assigned to label 0 by
      # default. So we flip the coef_ to reflect this.
      self.cavs = [-1 * lm.coef_[0], lm.coef_[0]]
    else:
      self.cavs = [c for c in lm.coef_]
    self._save_cavs() 
Example 29
Project: tcav   Author: tensorflow   File: cav_test.py    License: Apache License 2.0 5 votes vote down vote up
def test__train_lm(self):
    lm = linear_model.SGDClassifier(alpha=self.hparams.alpha)
    acc = self.cav._train_lm(lm, np.array([[0], [0], [0], [1], [1], [1]]),
                             np.array([0, 0, 0, 1, 1, 1]), {
                                 0: 0,
                                 1: 1
                             })
    # the given data is so easy it should get this almost perfect.
    self.assertGreater(acc[0], 0.99)
    self.assertGreater(acc[1], 0.99) 
Example 30
Project: scikit-multiflow   Author: scikit-multiflow   File: multi_output_learner.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, base_estimator=SGDClassifier(max_iter=100)):
        super().__init__()
        self.base_estimator = base_estimator
        self._check_estimator_type()
        self.ensemble = None
        self.n_targets = None