# Python sklearn.naive_bayes.BernoulliNB() Examples

The following are 30 code examples for showing how to use sklearn.naive_bayes.BernoulliNB(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module , or try the search function .

Example 1
```def test_feature_log_prob_bnb():
# Test for issue #4268.
# Tests that the feature log prob value computed by BernoulliNB when
# alpha=1.0 is equal to the expression given in Manning, Raghavan,
# and Schuetze's "Introduction to Information Retrieval" book:
# https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html

X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
Y = np.array([0, 0, 1, 2, 2])

# Fit Bernoulli NB w/ alpha = 1.0
clf = BernoulliNB(alpha=1.0)
clf.fit(X, Y)

# Manually form the (log) numerator and denominator that
# constitute P(feature presence | class)
num = np.log(clf.feature_count_ + 1.0)
denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T

# Check manual estimate matches
assert_array_almost_equal(clf.feature_log_prob_, (num - denom)) ```
Example 2
```def __init__(self, distributions, weights=None, **kwargs):
self.models = []
for dist in distributions:
dist = NaiveBayesianDistribution.from_string(dist)
if dist is NaiveBayesianDistribution.GAUSSIAN:
model = nb.GaussianNB(**kwargs)
elif dist is NaiveBayesianDistribution.MULTINOMIAL:
model = nb.MultinomialNB(**kwargs)
elif dist is NaiveBayesianDistribution.BERNOULLI:
model = nb.BernoulliNB(**kwargs)
else:
raise ValueError('Unknown distribution: {}.'.format(dist))
kwargs['fit_prior'] = False  # Except the first model.
self.models.append(model)

self.weights = weights ```
Example 3
```def test_discretenb_pickle():
# Test picklability of discrete naive Bayes classifiers

for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
clf = cls().fit(X2, y2)
y_pred = clf.predict(X2)

store = BytesIO()
pickle.dump(clf, store)

assert_array_equal(y_pred, clf.predict(X2))

if cls is not GaussianNB:
# TODO re-enable me when partial_fit is implemented for GaussianNB

# Test pickling of estimator trained with partial_fit
clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2))
clf2.partial_fit(X2[3:], y2[3:])
store = BytesIO()
pickle.dump(clf2, store)
assert_array_equal(y_pred, clf2.predict(X2)) ```
Example 4
```def test_input_check_partial_fit():
for cls in [BernoulliNB, MultinomialNB]:
# check shape consistency
assert_raises(ValueError, cls().partial_fit, X2, y2[:-1],
classes=np.unique(y2))

# classes is required for first call to partial fit
assert_raises(ValueError, cls().partial_fit, X2, y2)

# check consistency of consecutive classes values
clf = cls()
clf.partial_fit(X2, y2, classes=np.unique(y2))
assert_raises(ValueError, clf.partial_fit, X2, y2,
classes=np.arange(42))

# check consistency of input shape for partial_fit
assert_raises(ValueError, clf.partial_fit, X2[:, :-1], y2)

# check consistency of input shape for predict
assert_raises(ValueError, clf.predict, X2[:, :-1]) ```
Example 5
```def test_discretenb_provide_prior_with_partial_fit():
# Test whether discrete NB classes use provided prior
# when using partial_fit

iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split(
iris.data, iris.target, test_size=0.4, random_state=415)

for cls in [BernoulliNB, MultinomialNB]:
for prior in [None, [0.3, 0.3, 0.4]]:
clf_full = cls(class_prior=prior)
clf_full.fit(iris.data, iris.target)
clf_partial = cls(class_prior=prior)
clf_partial.partial_fit(iris_data1, iris_target1,
classes=[0, 1, 2])
clf_partial.partial_fit(iris_data2, iris_target2)
assert_array_almost_equal(clf_full.class_log_prior_,
clf_partial.class_log_prior_) ```
Example 6
```def test_feature_log_prob_bnb():
# Test for issue #4268.
# Tests that the feature log prob value computed by BernoulliNB when
# alpha=1.0 is equal to the expression given in Manning, Raghavan,
# and Schuetze's "Introduction to Information Retrieval" book:
# http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html

X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
Y = np.array([0, 0, 1, 2, 2])

# Fit Bernoulli NB w/ alpha = 1.0
clf = BernoulliNB(alpha=1.0)
clf.fit(X, Y)

# Manually form the (log) numerator and denominator that
# constitute P(feature presence | class)
num = np.log(clf.feature_count_ + 1.0)
denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T

# Check manual estimate matches
assert_array_almost_equal(clf.feature_log_prob_, (num - denom)) ```
Example 7
```def test_discrete_prior():
# Test whether class priors are properly set.
for cls in [BernoulliNB, MultinomialNB]:
clf = cls().fit(X2, y2)
assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0),
clf.class_log_prior_, 8) ```
Example 8
```def test_discretenb_predict_proba():
# Test discrete NB classes' probability scores

# The 100s below distinguish Bernoulli from multinomial.
# FIXME: write a test to show this.
X_bernoulli = [[1, 100, 0], [0, 1, 0], [0, 100, 1]]
X_multinomial = [[0, 1], [1, 3], [4, 0]]

# test binary case (1-d output)
y = [0, 0, 2]   # 2 is regression test for binary case, 02e673
for cls, X in zip([BernoulliNB, MultinomialNB],
[X_bernoulli, X_multinomial]):
clf = cls().fit(X, y)
assert_equal(clf.predict(X[-1:]), 2)
assert_equal(clf.predict_proba([X[0]]).shape, (1, 2))
assert_array_almost_equal(clf.predict_proba(X[:2]).sum(axis=1),
np.array([1., 1.]), 6)

# test multiclass case (2-d output, must sum to one)
y = [0, 1, 2]
for cls, X in zip([BernoulliNB, MultinomialNB],
[X_bernoulli, X_multinomial]):
clf = cls().fit(X, y)
assert_equal(clf.predict_proba(X[0:1]).shape, (1, 3))
assert_equal(clf.predict_proba(X[:2]).shape, (2, 3))
assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1)
assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1)
assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1)
assert_almost_equal(np.sum(np.exp(clf.intercept_)), 1) ```
Example 9
```def test_coef_intercept_shape():
# coef_ and intercept_ should have shapes as in other linear models.
# Non-regression test for issue #2127.
X = [[1, 0, 0], [1, 1, 1]]
y = [1, 2]  # binary classification

for clf in [MultinomialNB(), BernoulliNB()]:
clf.fit(X, y)
assert_equal(clf.coef_.shape, (1, 3))
assert_equal(clf.intercept_.shape, (1,)) ```
Example 10
```def test_check_accuracy_on_digits():
# Non regression test to make sure that any further refactoring / optim
# of the NB models do not harm the performance on a slightly non-linearly
# separable dataset
X, y = digits.data, digits.target
binary_3v8 = np.logical_or(digits.target == 3, digits.target == 8)
X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8]

# Multinomial NB
scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10)
assert_greater(scores.mean(), 0.86)

scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10)
assert_greater(scores.mean(), 0.94)

# Bernoulli NB
scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10)
assert_greater(scores.mean(), 0.83)

scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10)
assert_greater(scores.mean(), 0.92)

# Gaussian NB
scores = cross_val_score(GaussianNB(), X, y, cv=10)
assert_greater(scores.mean(), 0.77)

scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10)
assert_greater(scores.mean(), 0.89)

scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10)
assert_greater(scores.mean(), 0.86) ```
Example 11
```def __init__(self, info, verbose=True, debug_mode=False):
self.label_num=info['label_num']
self.target_num=info['target_num']
self.metric = info['metric']
self.postprocessor = None
#self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
if debug_mode>=2:
self.name = "RandomPredictor"
self.model = RandomPredictor(self.target_num)
self.predict_method = self.model.predict_proba
return
if info['is_sparse']==True:
self.name = "BaggingRidgeRegressor"
self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
else:
self.model = GradientBoostingRegressor(n_estimators=1, verbose=verbose, warm_start = True)
self.predict_method = self.model.predict # Always predict probabilities
else:
if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
self.name = "RandomForestClassifier"
self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
elif info['is_sparse']:
self.name = "BaggingNBClassifier"
self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
else:
self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", min_samples_split=10, random_state=1, warm_start = True)")
self.model = MultiLabelEnsemble(self.model)
self.predict_method = self.model.predict_proba ```
Example 12
```def __init__(self, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None):
self._hyperparams = {
'alpha': alpha,
'binarize': binarize,
'fit_prior': fit_prior,
'class_prior': class_prior}
self._wrapped_model = Op(**self._hyperparams) ```
Example 13
```def test_export_random_ind():
"""Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
tpot_obj._fit_init()
tpot_obj._pbar = tqdm(total=1, disable=True)
pipeline = tpot_obj._toolbox.individual()
expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'], random_state=39)

exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False)
# Fix random state in exported estimator
if hasattr(exported_pipeline, 'random_state'):
setattr(exported_pipeline, 'random_state', 39)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
exported_code = export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
assert expected_code == exported_code ```
Example 14
```def __init__(self, options):
self.handle_options(options)

out_params = convert_params(
options.get('params', {}),
floats=['alpha', 'binarize'],
bools=['fit_prior'],
)

self.estimator = _BernoulliNB(**out_params) ```
Example 15
```def test_BernoulliNB(self):
BernoulliNB_Algo.register_codecs()
self.classifier_util(BernoulliNB) ```
Example 16
```def learn_model(data,target):
# preparing data for split validation. 60% training, 40% test
data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.4,random_state=43)
classifier = BernoulliNB().fit(data_train,target_train)
predicted = classifier.predict(data_test)
evaluate_model(target_test,predicted)

# http://scikit-learn.org/stable/modules/model_evaluation.html ```
Example 17
```def trainBernoulliNB(data):
"""
使用伯努利模型对数据建模
"""
vect = CountVectorizer(token_pattern=r"(?u)\b\w+\b", binary=True)
X = vect.fit_transform(data["content"])
le = LabelEncoder()
Y = le.fit_transform(data["label"])
model = BernoulliNB()
model.fit(X, Y)
return vect, le, model ```
Example 18
```def trainModel(data):
"""
使用random forest embedding+伯努利模型对数据建模
"""
pipe = Pipeline([("embedding", RandomTreesEmbedding(random_state=1024)),
("model", BernoulliNB())])
pipe.fit(data[["x1", "x2"]], data["y"])
return pipe ```
Example 19
```def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.naive_bayes.GaussianNB, nb.GaussianNB)
self.assertIs(df.naive_bayes.MultinomialNB, nb.MultinomialNB)
self.assertIs(df.naive_bayes.BernoulliNB, nb.BernoulliNB) ```
Example 20
```def test_discrete_prior():
# Test whether class priors are properly set.
for cls in [BernoulliNB, MultinomialNB]:
clf = cls().fit(X2, y2)
assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0),
clf.class_log_prior_, 8) ```
Example 21
```def test_discretenb_partial_fit():
for cls in [MultinomialNB, BernoulliNB]:
yield check_partial_fit, cls ```
Example 22
```def test_input_check_fit():
# Test input checks for the fit method
for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
# check shape consistency for number of samples at fit time
assert_raises(ValueError, cls().fit, X2, y2[:-1])

# check shape consistency for number of input features at predict time
clf = cls().fit(X2, y2)
assert_raises(ValueError, clf.predict, X2[:, :-1]) ```
Example 23
```def test_discretenb_uniform_prior():
# Test whether discrete NB classes fit a uniform prior
# when fit_prior=False and class_prior=None

for cls in [BernoulliNB, MultinomialNB]:
clf = cls()
clf.set_params(fit_prior=False)
clf.fit([[0], [0], [1]], [0, 0, 1])
prior = np.exp(clf.class_log_prior_)
assert_array_equal(prior, np.array([.5, .5])) ```
Example 24
```def test_discretenb_provide_prior():
# Test whether discrete NB classes use provided prior

for cls in [BernoulliNB, MultinomialNB]:
clf = cls(class_prior=[0.5, 0.5])
clf.fit([[0], [0], [1]], [0, 0, 1])
prior = np.exp(clf.class_log_prior_)
assert_array_equal(prior, np.array([.5, .5]))

# Inconsistent number of classes with prior
assert_raises(ValueError, clf.fit, [[0], [1], [2]], [0, 1, 2])
assert_raises(ValueError, clf.partial_fit, [[0], [1]], [0, 1],
classes=[0, 1, 1]) ```
Example 25
```def test_sample_weight_multiclass():
for cls in [BernoulliNB, MultinomialNB]:
# check shape consistency for number of samples at fit time
yield check_sample_weight_multiclass, cls ```
Example 26
```def test_coef_intercept_shape():
# coef_ and intercept_ should have shapes as in other linear models.
# Non-regression test for issue #2127.
X = [[1, 0, 0], [1, 1, 1]]
y = [1, 2]  # binary classification

for clf in [MultinomialNB(), BernoulliNB()]:
clf.fit(X, y)
assert_equal(clf.coef_.shape, (1, 3))
assert_equal(clf.intercept_.shape, (1,)) ```
Example 27
```def retrieve_args(self):
"""Adds arguments to the parser for each respective setting of the command line interface"""
# Classifier
default='MultinomialNB', help="Represents the classifier that will be used (default: MultinomialNB) .")

# Classifier's arguments
help="Represents the arguments that will be passed to the classifier (default: '').")

# Data: Testing and training already split
default=["data/imdb-binary-pool-mindf5-ng11", "data/imdb-binary-test-mindf5-ng11"],
help='Files that contain the data, pool and test, and number of features (default: data/imdb-binary-pool-mindf5-ng11 data/imdb-binary-test-mindf5-ng11 27272).')

# Data: Single File
help='Single file that contains the data. Cross validation will be performed (default: None).')

# Whether to make the data dense
self.parser.add_argument('-make_dense', default=False, action='store_true', help='Whether to make the sparse data dense. Some classifiers require this.')

# Number of Folds
self.parser.add_argument("-cv", type=int, default=10, help="Number of folds for cross validation. Works only if a single dataset is loaded (default: 10).")

# File: Name of file that will be written the results
help='This feature represents the name that will be written with the result. If it is left blank, the file will not be written (default: None ).')

# Number of Trials
self.parser.add_argument("-nt", "--num_trials", type=int, default=10, help="Number of trials (default: 10).")

# Strategies
self.parser.add_argument("-st", "--strategies", choices=['erreduct', 'loggain', 'qbc', 'rand','unc'], nargs='*',default=['rand'],
help="Represent a list of strategies for choosing next samples (default: rand).")

# Boot Strap
help='Sets the Boot strap (default: 10).')

# Budget
help='Sets the budget (default: 500).')

# Step size
help='Sets the step size (default: 10).')

# Sub pool size
help='Sets the sub pool size (default: None).') ```
Example 28
```def test_alpha():
# Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
X = np.array([[1, 0], [1, 1]])
y = np.array([0, 1])
nb = BernoulliNB(alpha=0.)
assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[1, 0], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

nb = MultinomialNB(alpha=0.)
assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[2./3, 1./3], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

# Test sparse X
X = scipy.sparse.csr_matrix(X)
nb = BernoulliNB(alpha=0.)
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[1, 0], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

nb = MultinomialNB(alpha=0.)
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[2./3, 1./3], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

# Test for alpha < 0
X = np.array([[1, 0], [1, 1]])
y = np.array([0, 1])
expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
'alpha should be > 0.')
b_nb = BernoulliNB(alpha=-0.1)
m_nb = MultinomialNB(alpha=-0.1)
assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y)
assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)

b_nb = BernoulliNB(alpha=-0.1)
m_nb = MultinomialNB(alpha=-0.1)
assert_raise_message(ValueError, expected_msg, b_nb.partial_fit,
X, y, classes=[0, 1])
assert_raise_message(ValueError, expected_msg, m_nb.partial_fit,
X, y, classes=[0, 1]) ```
Example 29
```def test_bnb():
# Tests that BernoulliNB when alpha=1.0 gives the same values as
# those given for the toy example in Manning, Raghavan, and
# Schuetze's "Introduction to Information Retrieval" book:
# http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html

# Training data points are:
# Chinese Beijing Chinese (class: China)
# Chinese Chinese Shanghai (class: China)
# Chinese Macao (class: China)
# Tokyo Japan Chinese (class: Japan)

# Features are Beijing, Chinese, Japan, Macao, Shanghai, and Tokyo
X = np.array([[1, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0],
[0, 1, 0, 1, 0, 0],
[0, 1, 1, 0, 0, 1]])

# Classes are China (0), Japan (1)
Y = np.array([0, 0, 0, 1])

# Fit BernoulliBN w/ alpha = 1.0
clf = BernoulliNB(alpha=1.0)
clf.fit(X, Y)

# Check the class prior is correct
class_prior = np.array([0.75, 0.25])
assert_array_almost_equal(np.exp(clf.class_log_prior_), class_prior)

# Check the feature probabilities are correct
feature_prob = np.array([[0.4, 0.8, 0.2, 0.4, 0.4, 0.2],
[1/3.0, 2/3.0, 2/3.0, 1/3.0, 1/3.0, 2/3.0]])
assert_array_almost_equal(np.exp(clf.feature_log_prob_), feature_prob)

# Testing data point is:
# Chinese Chinese Chinese Tokyo Japan
X_test = np.array([[0, 1, 1, 0, 0, 1]])

# Check the predictive probabilities are correct
unnorm_predict_proba = np.array([[0.005183999999999999,
0.02194787379972565]])
predict_proba = unnorm_predict_proba / np.sum(unnorm_predict_proba)
assert_array_almost_equal(clf.predict_proba(X_test), predict_proba) ```
Example 30
```def test_alpha():
# Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
X = np.array([[1, 0], [1, 1]])
y = np.array([0, 1])
nb = BernoulliNB(alpha=0.)
assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[1, 0], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

nb = MultinomialNB(alpha=0.)
assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[2./3, 1./3], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

# Test sparse X
X = scipy.sparse.csr_matrix(X)
nb = BernoulliNB(alpha=0.)
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[1, 0], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

nb = MultinomialNB(alpha=0.)
assert_warns(UserWarning, nb.fit, X, y)
prob = np.array([[2./3, 1./3], [0, 1]])
assert_array_almost_equal(nb.predict_proba(X), prob)

# Test for alpha < 0
X = np.array([[1, 0], [1, 1]])
y = np.array([0, 1])
expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
'alpha should be > 0.')
b_nb = BernoulliNB(alpha=-0.1)
m_nb = MultinomialNB(alpha=-0.1)
assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y)
assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)

b_nb = BernoulliNB(alpha=-0.1)
m_nb = MultinomialNB(alpha=-0.1)
assert_raise_message(ValueError, expected_msg, b_nb.partial_fit,
X, y, classes=[0, 1])
assert_raise_message(ValueError, expected_msg, m_nb.partial_fit,
X, y, classes=[0, 1]) ```