Python sklearn.naive_bayes.BernoulliNB() Examples

The following are 30 code examples for showing how to use sklearn.naive_bayes.BernoulliNB(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.naive_bayes , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_naive_bayes.py    License: MIT License 6 votes vote down vote up
def test_feature_log_prob_bnb():
    # Test for issue #4268.
    # Tests that the feature log prob value computed by BernoulliNB when
    # alpha=1.0 is equal to the expression given in Manning, Raghavan,
    # and Schuetze's "Introduction to Information Retrieval" book:
    # https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html

    X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
    Y = np.array([0, 0, 1, 2, 2])

    # Fit Bernoulli NB w/ alpha = 1.0
    clf = BernoulliNB(alpha=1.0)
    clf.fit(X, Y)

    # Manually form the (log) numerator and denominator that
    # constitute P(feature presence | class)
    num = np.log(clf.feature_count_ + 1.0)
    denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T

    # Check manual estimate matches
    assert_array_almost_equal(clf.feature_log_prob_, (num - denom)) 
Example 2
Project: Jacinle   Author: vacancy   File: hybrid_nb.py    License: MIT License 6 votes vote down vote up
def __init__(self, distributions, weights=None, **kwargs):
        self.models = []
        for dist in distributions:
            dist = NaiveBayesianDistribution.from_string(dist)
            if dist is NaiveBayesianDistribution.GAUSSIAN:
                model = nb.GaussianNB(**kwargs)
            elif dist is NaiveBayesianDistribution.MULTINOMIAL:
                model = nb.MultinomialNB(**kwargs)
            elif dist is NaiveBayesianDistribution.BERNOULLI:
                model = nb.BernoulliNB(**kwargs)
            else:
                raise ValueError('Unknown distribution: {}.'.format(dist))
            kwargs['fit_prior'] = False  # Except the first model.
            self.models.append(model)

        self.weights = weights 
Example 3
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 6 votes vote down vote up
def test_discretenb_pickle():
    # Test picklability of discrete naive Bayes classifiers

    for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
        clf = cls().fit(X2, y2)
        y_pred = clf.predict(X2)

        store = BytesIO()
        pickle.dump(clf, store)
        clf = pickle.load(BytesIO(store.getvalue()))

        assert_array_equal(y_pred, clf.predict(X2))

        if cls is not GaussianNB:
            # TODO re-enable me when partial_fit is implemented for GaussianNB

            # Test pickling of estimator trained with partial_fit
            clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2))
            clf2.partial_fit(X2[3:], y2[3:])
            store = BytesIO()
            pickle.dump(clf2, store)
            clf2 = pickle.load(BytesIO(store.getvalue()))
            assert_array_equal(y_pred, clf2.predict(X2)) 
Example 4
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 6 votes vote down vote up
def test_input_check_partial_fit():
    for cls in [BernoulliNB, MultinomialNB]:
        # check shape consistency
        assert_raises(ValueError, cls().partial_fit, X2, y2[:-1],
                      classes=np.unique(y2))

        # classes is required for first call to partial fit
        assert_raises(ValueError, cls().partial_fit, X2, y2)

        # check consistency of consecutive classes values
        clf = cls()
        clf.partial_fit(X2, y2, classes=np.unique(y2))
        assert_raises(ValueError, clf.partial_fit, X2, y2,
                      classes=np.arange(42))

        # check consistency of input shape for partial_fit
        assert_raises(ValueError, clf.partial_fit, X2[:, :-1], y2)

        # check consistency of input shape for predict
        assert_raises(ValueError, clf.predict, X2[:, :-1]) 
Example 5
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 6 votes vote down vote up
def test_discretenb_provide_prior_with_partial_fit():
    # Test whether discrete NB classes use provided prior
    # when using partial_fit

    iris = load_iris()
    iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split(
        iris.data, iris.target, test_size=0.4, random_state=415)

    for cls in [BernoulliNB, MultinomialNB]:
        for prior in [None, [0.3, 0.3, 0.4]]:
            clf_full = cls(class_prior=prior)
            clf_full.fit(iris.data, iris.target)
            clf_partial = cls(class_prior=prior)
            clf_partial.partial_fit(iris_data1, iris_target1,
                                    classes=[0, 1, 2])
            clf_partial.partial_fit(iris_data2, iris_target2)
            assert_array_almost_equal(clf_full.class_log_prior_,
                                      clf_partial.class_log_prior_) 
Example 6
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 6 votes vote down vote up
def test_feature_log_prob_bnb():
    # Test for issue #4268.
    # Tests that the feature log prob value computed by BernoulliNB when
    # alpha=1.0 is equal to the expression given in Manning, Raghavan,
    # and Schuetze's "Introduction to Information Retrieval" book:
    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html

    X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
    Y = np.array([0, 0, 1, 2, 2])

    # Fit Bernoulli NB w/ alpha = 1.0
    clf = BernoulliNB(alpha=1.0)
    clf.fit(X, Y)

    # Manually form the (log) numerator and denominator that
    # constitute P(feature presence | class)
    num = np.log(clf.feature_count_ + 1.0)
    denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T

    # Check manual estimate matches
    assert_array_almost_equal(clf.feature_log_prob_, (num - denom)) 
Example 7
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_discrete_prior():
    # Test whether class priors are properly set.
    for cls in [BernoulliNB, MultinomialNB]:
        clf = cls().fit(X2, y2)
        assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0),
                                  clf.class_log_prior_, 8) 
Example 8
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_discretenb_predict_proba():
    # Test discrete NB classes' probability scores

    # The 100s below distinguish Bernoulli from multinomial.
    # FIXME: write a test to show this.
    X_bernoulli = [[1, 100, 0], [0, 1, 0], [0, 100, 1]]
    X_multinomial = [[0, 1], [1, 3], [4, 0]]

    # test binary case (1-d output)
    y = [0, 0, 2]   # 2 is regression test for binary case, 02e673
    for cls, X in zip([BernoulliNB, MultinomialNB],
                      [X_bernoulli, X_multinomial]):
        clf = cls().fit(X, y)
        assert_equal(clf.predict(X[-1:]), 2)
        assert_equal(clf.predict_proba([X[0]]).shape, (1, 2))
        assert_array_almost_equal(clf.predict_proba(X[:2]).sum(axis=1),
                                  np.array([1., 1.]), 6)

    # test multiclass case (2-d output, must sum to one)
    y = [0, 1, 2]
    for cls, X in zip([BernoulliNB, MultinomialNB],
                      [X_bernoulli, X_multinomial]):
        clf = cls().fit(X, y)
        assert_equal(clf.predict_proba(X[0:1]).shape, (1, 3))
        assert_equal(clf.predict_proba(X[:2]).shape, (2, 3))
        assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1)
        assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1)
        assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1)
        assert_almost_equal(np.sum(np.exp(clf.intercept_)), 1) 
Example 9
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_coef_intercept_shape():
    # coef_ and intercept_ should have shapes as in other linear models.
    # Non-regression test for issue #2127.
    X = [[1, 0, 0], [1, 1, 1]]
    y = [1, 2]  # binary classification

    for clf in [MultinomialNB(), BernoulliNB()]:
        clf.fit(X, y)
        assert_equal(clf.coef_.shape, (1, 3))
        assert_equal(clf.intercept_.shape, (1,)) 
Example 10
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_check_accuracy_on_digits():
    # Non regression test to make sure that any further refactoring / optim
    # of the NB models do not harm the performance on a slightly non-linearly
    # separable dataset
    digits = load_digits()
    X, y = digits.data, digits.target
    binary_3v8 = np.logical_or(digits.target == 3, digits.target == 8)
    X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8]

    # Multinomial NB
    scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10)
    assert_greater(scores.mean(), 0.86)

    scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10)
    assert_greater(scores.mean(), 0.94)

    # Bernoulli NB
    scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10)
    assert_greater(scores.mean(), 0.83)

    scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10)
    assert_greater(scores.mean(), 0.92)

    # Gaussian NB
    scores = cross_val_score(GaussianNB(), X, y, cv=10)
    assert_greater(scores.mean(), 0.77)

    scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10)
    assert_greater(scores.mean(), 0.89)

    scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10)
    assert_greater(scores.mean(), 0.86) 
Example 11
Project: automl-phase-2   Author: jamesrobertlloyd   File: models.py    License: MIT License 5 votes vote down vote up
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", min_samples_split=10, random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba 
Example 12
Project: lale   Author: IBM   File: bernoulli_nb.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None):
        self._hyperparams = {
            'alpha': alpha,
            'binarize': binarize,
            'fit_prior': fit_prior,
            'class_prior': class_prior}
        self._wrapped_model = Op(**self._hyperparams) 
Example 13
Project: tpot   Author: EpistasisLab   File: export_tests.py    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._fit_init()
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=39)

exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False)
# Fix random state in exported estimator
if hasattr(exported_pipeline, 'random_state'):
    setattr(exported_pipeline, 'random_state', 39)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
    exported_code = export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
    assert expected_code == exported_code 
Example 14
Project: Splunking-Crime   Author: nccgroup   File: BernoulliNB.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            floats=['alpha', 'binarize'],
            bools=['fit_prior'],
        )

        self.estimator = _BernoulliNB(**out_params) 
Example 15
Project: Splunking-Crime   Author: nccgroup   File: test_codec.py    License: GNU Affero General Public License v3.0 5 votes vote down vote up
def test_BernoulliNB(self):
        BernoulliNB_Algo.register_codecs()
        self.classifier_util(BernoulliNB) 
Example 16
Project: pychennai-sentiment-analysis   Author: vumaasha   File: sentiment_analysis.py    License: Apache License 2.0 5 votes vote down vote up
def learn_model(data,target):
    # preparing data for split validation. 60% training, 40% test
    data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.4,random_state=43)
    classifier = BernoulliNB().fit(data_train,target_train)
    predicted = classifier.predict(data_test)
    evaluate_model(target_test,predicted)

# read more about model evaluation metrics here
# http://scikit-learn.org/stable/modules/model_evaluation.html 
Example 17
Project: intro_ds   Author: GenTang   File: text_classification.py    License: Apache License 2.0 5 votes vote down vote up
def trainBernoulliNB(data):
    """
    使用伯努利模型对数据建模
    """
    vect = CountVectorizer(token_pattern=r"(?u)\b\w+\b", binary=True)
    X = vect.fit_transform(data["content"])
    le = LabelEncoder()
    Y = le.fit_transform(data["label"])
    model = BernoulliNB()
    model.fit(X, Y)
    return vect, le, model 
Example 18
Project: intro_ds   Author: GenTang   File: classification_example.py    License: Apache License 2.0 5 votes vote down vote up
def trainModel(data):
    """
    使用random forest embedding+伯努利模型对数据建模
    """
    pipe = Pipeline([("embedding", RandomTreesEmbedding(random_state=1024)),
        ("model", BernoulliNB())])
    pipe.fit(data[["x1", "x2"]], data["y"])
    return pipe 
Example 19
Project: pandas-ml   Author: pandas-ml   File: test_naive_bayes.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.naive_bayes.GaussianNB, nb.GaussianNB)
        self.assertIs(df.naive_bayes.MultinomialNB, nb.MultinomialNB)
        self.assertIs(df.naive_bayes.BernoulliNB, nb.BernoulliNB) 
Example 20
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_discrete_prior():
    # Test whether class priors are properly set.
    for cls in [BernoulliNB, MultinomialNB]:
        clf = cls().fit(X2, y2)
        assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0),
                                  clf.class_log_prior_, 8) 
Example 21
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_discretenb_partial_fit():
    for cls in [MultinomialNB, BernoulliNB]:
        yield check_partial_fit, cls 
Example 22
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_input_check_fit():
    # Test input checks for the fit method
    for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
        # check shape consistency for number of samples at fit time
        assert_raises(ValueError, cls().fit, X2, y2[:-1])

        # check shape consistency for number of input features at predict time
        clf = cls().fit(X2, y2)
        assert_raises(ValueError, clf.predict, X2[:, :-1]) 
Example 23
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_discretenb_uniform_prior():
    # Test whether discrete NB classes fit a uniform prior
    # when fit_prior=False and class_prior=None

    for cls in [BernoulliNB, MultinomialNB]:
        clf = cls()
        clf.set_params(fit_prior=False)
        clf.fit([[0], [0], [1]], [0, 0, 1])
        prior = np.exp(clf.class_log_prior_)
        assert_array_equal(prior, np.array([.5, .5])) 
Example 24
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_discretenb_provide_prior():
    # Test whether discrete NB classes use provided prior

    for cls in [BernoulliNB, MultinomialNB]:
        clf = cls(class_prior=[0.5, 0.5])
        clf.fit([[0], [0], [1]], [0, 0, 1])
        prior = np.exp(clf.class_log_prior_)
        assert_array_equal(prior, np.array([.5, .5]))

        # Inconsistent number of classes with prior
        assert_raises(ValueError, clf.fit, [[0], [1], [2]], [0, 1, 2])
        assert_raises(ValueError, clf.partial_fit, [[0], [1]], [0, 1],
                      classes=[0, 1, 1]) 
Example 25
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_sample_weight_multiclass():
    for cls in [BernoulliNB, MultinomialNB]:
        # check shape consistency for number of samples at fit time
        yield check_sample_weight_multiclass, cls 
Example 26
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 5 votes vote down vote up
def test_coef_intercept_shape():
    # coef_ and intercept_ should have shapes as in other linear models.
    # Non-regression test for issue #2127.
    X = [[1, 0, 0], [1, 1, 1]]
    y = [1, 2]  # binary classification

    for clf in [MultinomialNB(), BernoulliNB()]:
        clf.fit(X, y)
        assert_equal(clf.coef_.shape, (1, 3))
        assert_equal(clf.intercept_.shape, (1,)) 
Example 27
Project: AL   Author: iitml   File: run_al_cl.py    License: GNU General Public License v2.0 4 votes vote down vote up
def retrieve_args(self):
        """Adds arguments to the parser for each respective setting of the command line interface"""
        # Classifier
        self.parser.add_argument("-c","--classifier", choices=['KNeighborsClassifier', 'LogisticRegression', 'SVC', 'BernoulliNB',
                        'DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoostClassifier', 'GaussianNB', 'MultinomialNB'],
                        default='MultinomialNB', help="Represents the classifier that will be used (default: MultinomialNB) .")

        # Classifier's arguments
        self.parser.add_argument("-a","--arguments", default='',
                        help="Represents the arguments that will be passed to the classifier (default: '').")

        # Data: Testing and training already split
        self.parser.add_argument("-d", '--data', nargs=2, metavar=('pool', 'test'),
                        default=["data/imdb-binary-pool-mindf5-ng11", "data/imdb-binary-test-mindf5-ng11"],
                        help='Files that contain the data, pool and test, and number of features (default: data/imdb-binary-pool-mindf5-ng11 data/imdb-binary-test-mindf5-ng11 27272).')

        # Data: Single File
        self.parser.add_argument("-sd", '--sdata', type=str, default='',
                        help='Single file that contains the data. Cross validation will be performed (default: None).')
        
        # Whether to make the data dense
        self.parser.add_argument('-make_dense', default=False, action='store_true', help='Whether to make the sparse data dense. Some classifiers require this.')
        
        # Number of Folds
        self.parser.add_argument("-cv", type=int, default=10, help="Number of folds for cross validation. Works only if a single dataset is loaded (default: 10).")

        # File: Name of file that will be written the results
        self.parser.add_argument("-f", '--file', type=str, default=None,
                        help='This feature represents the name that will be written with the result. If it is left blank, the file will not be written (default: None ).')

        # Number of Trials
        self.parser.add_argument("-nt", "--num_trials", type=int, default=10, help="Number of trials (default: 10).")

        # Strategies
        self.parser.add_argument("-st", "--strategies", choices=['erreduct', 'loggain', 'qbc', 'rand','unc'], nargs='*',default=['rand'],
                        help="Represent a list of strategies for choosing next samples (default: rand).")

        # Boot Strap
        self.parser.add_argument("-bs", '--bootstrap', default=10, type=int,
                        help='Sets the Boot strap (default: 10).')

        # Budget
        self.parser.add_argument("-b", '--budget', default=500, type=int,
                        help='Sets the budget (default: 500).')

        # Step size
        self.parser.add_argument("-sz", '--stepsize', default=10, type=int,
                        help='Sets the step size (default: 10).')

        # Sub pool size
        self.parser.add_argument("-sp", '--subpool', default=None, type=int,
                        help='Sets the sub pool size (default: None).') 
Example 28
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_naive_bayes.py    License: MIT License 4 votes vote down vote up
def test_alpha():
    # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])
    nb = BernoulliNB(alpha=0.)
    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1, 0], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = MultinomialNB(alpha=0.)
    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[2./3, 1./3], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test sparse X
    X = scipy.sparse.csr_matrix(X)
    nb = BernoulliNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1, 0], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = MultinomialNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[2./3, 1./3], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test for alpha < 0
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])
    expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
                    'alpha should be > 0.')
    b_nb = BernoulliNB(alpha=-0.1)
    m_nb = MultinomialNB(alpha=-0.1)
    assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y)
    assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)

    b_nb = BernoulliNB(alpha=-0.1)
    m_nb = MultinomialNB(alpha=-0.1)
    assert_raise_message(ValueError, expected_msg, b_nb.partial_fit,
                         X, y, classes=[0, 1])
    assert_raise_message(ValueError, expected_msg, m_nb.partial_fit,
                         X, y, classes=[0, 1]) 
Example 29
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 4 votes vote down vote up
def test_bnb():
    # Tests that BernoulliNB when alpha=1.0 gives the same values as
    # those given for the toy example in Manning, Raghavan, and
    # Schuetze's "Introduction to Information Retrieval" book:
    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html

    # Training data points are:
    # Chinese Beijing Chinese (class: China)
    # Chinese Chinese Shanghai (class: China)
    # Chinese Macao (class: China)
    # Tokyo Japan Chinese (class: Japan)

    # Features are Beijing, Chinese, Japan, Macao, Shanghai, and Tokyo
    X = np.array([[1, 1, 0, 0, 0, 0],
                  [0, 1, 0, 0, 1, 0],
                  [0, 1, 0, 1, 0, 0],
                  [0, 1, 1, 0, 0, 1]])

    # Classes are China (0), Japan (1)
    Y = np.array([0, 0, 0, 1])

    # Fit BernoulliBN w/ alpha = 1.0
    clf = BernoulliNB(alpha=1.0)
    clf.fit(X, Y)

    # Check the class prior is correct
    class_prior = np.array([0.75, 0.25])
    assert_array_almost_equal(np.exp(clf.class_log_prior_), class_prior)

    # Check the feature probabilities are correct
    feature_prob = np.array([[0.4, 0.8, 0.2, 0.4, 0.4, 0.2],
                             [1/3.0, 2/3.0, 2/3.0, 1/3.0, 1/3.0, 2/3.0]])
    assert_array_almost_equal(np.exp(clf.feature_log_prob_), feature_prob)

    # Testing data point is:
    # Chinese Chinese Chinese Tokyo Japan
    X_test = np.array([[0, 1, 1, 0, 0, 1]])

    # Check the predictive probabilities are correct
    unnorm_predict_proba = np.array([[0.005183999999999999,
                                      0.02194787379972565]])
    predict_proba = unnorm_predict_proba / np.sum(unnorm_predict_proba)
    assert_array_almost_equal(clf.predict_proba(X_test), predict_proba) 
Example 30
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_naive_bayes.py    License: MIT License 4 votes vote down vote up
def test_alpha():
    # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])
    nb = BernoulliNB(alpha=0.)
    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1, 0], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = MultinomialNB(alpha=0.)
    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[2./3, 1./3], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test sparse X
    X = scipy.sparse.csr_matrix(X)
    nb = BernoulliNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1, 0], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = MultinomialNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[2./3, 1./3], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test for alpha < 0
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])
    expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
                    'alpha should be > 0.')
    b_nb = BernoulliNB(alpha=-0.1)
    m_nb = MultinomialNB(alpha=-0.1)
    assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y)
    assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)

    b_nb = BernoulliNB(alpha=-0.1)
    m_nb = MultinomialNB(alpha=-0.1)
    assert_raise_message(ValueError, expected_msg, b_nb.partial_fit,
                         X, y, classes=[0, 1])
    assert_raise_message(ValueError, expected_msg, m_nb.partial_fit,
                         X, y, classes=[0, 1])