Python sklearn.decomposition.LatentDirichletAllocation() Examples

The following are 30 code examples for showing how to use sklearn.decomposition.LatentDirichletAllocation(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.decomposition , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def test_lda_preplexity_mismatch():
    # test dimension mismatch in `perplexity` method
    rng = np.random.RandomState(0)
    n_components = rng.randint(3, 6)
    n_samples = rng.randint(6, 10)
    X = np.random.randint(4, size=(n_samples, 10))
    lda = LatentDirichletAllocation(n_components=n_components,
                                    learning_offset=5., total_samples=20,
                                    random_state=rng)
    lda.fit(X)
    # invalid samples
    invalid_n_samples = rng.randint(4, size=(n_samples + 1, n_components))
    assert_raises_regexp(ValueError, r'Number of samples',
                         lda._perplexity_precomp_distr, X, invalid_n_samples)
    # invalid topic number
    invalid_n_components = rng.randint(4, size=(n_samples, n_components + 1))
    assert_raises_regexp(ValueError, r'Number of topics',
                         lda._perplexity_precomp_distr, X,
                         invalid_n_components) 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def test_lda_perplexity(method):
    # Test LDA perplexity for batch training
    # perplexity should be lower after each iteration
    n_components, X = _build_sparse_mtx()
    lda_1 = LatentDirichletAllocation(n_components=n_components,
                                      max_iter=1, learning_method=method,
                                      total_samples=100, random_state=0)
    lda_2 = LatentDirichletAllocation(n_components=n_components,
                                      max_iter=10, learning_method=method,
                                      total_samples=100, random_state=0)
    lda_1.fit(X)
    perp_1 = lda_1.perplexity(X, sub_sampling=False)

    lda_2.fit(X)
    perp_2 = lda_2.perplexity(X, sub_sampling=False)
    assert_greater_equal(perp_1, perp_2)

    perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True)
    perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True)
    assert_greater_equal(perp_1_subsampling, perp_2_subsampling) 
Example 3
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def test_lda_score(method):
    # Test LDA score for batch training
    # score should be higher after each iteration
    n_components, X = _build_sparse_mtx()
    lda_1 = LatentDirichletAllocation(n_components=n_components,
                                      max_iter=1, learning_method=method,
                                      total_samples=100, random_state=0)
    lda_2 = LatentDirichletAllocation(n_components=n_components,
                                      max_iter=10, learning_method=method,
                                      total_samples=100, random_state=0)
    lda_1.fit_transform(X)
    score_1 = lda_1.score(X)

    lda_2.fit_transform(X)
    score_2 = lda_2.score(X)
    assert_greater_equal(score_2, score_1) 
Example 4
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def test_lda_fit_perplexity():
    # Test that the perplexity computed during fit is consistent with what is
    # returned by the perplexity method
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                    learning_method='batch', random_state=0,
                                    evaluate_every=1)
    lda.fit(X)

    # Perplexity computed at end of fit method
    perplexity1 = lda.bound_

    # Result of perplexity method on the train set
    perplexity2 = lda.perplexity(X)

    assert_almost_equal(perplexity1, perplexity2) 
Example 5
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def check_verbosity(verbose, evaluate_every, expected_lines,
                    expected_perplexities):
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components, max_iter=3,
                                    learning_method='batch',
                                    verbose=verbose,
                                    evaluate_every=evaluate_every,
                                    random_state=0)
    out = StringIO()
    old_out, sys.stdout = sys.stdout, out
    try:
        lda.fit(X)
    finally:
        sys.stdout = old_out

    n_lines = out.getvalue().count('\n')
    n_perplexity = out.getvalue().count('perplexity')
    assert_equal(expected_lines, n_lines)
    assert_equal(expected_perplexities, n_perplexity) 
Example 6
Project: Spider   Author: starFalll   File: LDA_Analysis.py    License: MIT License 6 votes vote down vote up
def word2vec(word_list,n_features=1000,topics = 5):
    tf_vectorizer = CountVectorizer(strip_accents='unicode',
                                    max_features=n_features,
                                    #stop_words='english',
                                    max_df=0.5,
                                    min_df=10)
    tf = tf_vectorizer.fit_transform(word_list)

    lda = LatentDirichletAllocation(n_components=topics,#主题数
                                    learning_method='batch',#样本量不大只是用来学习的话用"batch"比较好,这样可以少很多参数要调
                                    )
    #用变分贝叶斯方法训练模型
    lda.fit(tf)

    #依次输出每个主题的关键词表
    tf_feature_names = tf_vectorizer.get_feature_names()

    return lda,tf,tf_feature_names,tf_vectorizer

#将主题以可视化结果展现出来 
Example 7
Project: atap   Author: foxbook   File: topics.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, n_topics=50, estimator='LDA'):
        """
        n_topics is the desired number of topics
        To use Latent Semantic Analysis, set estimator to 'LSA',
        To use Non-Negative Matrix Factorization, set estimator to 'NMF',
        otherwise, defaults to Latent Dirichlet Allocation ('LDA').
        """
        self.n_topics = n_topics

        if estimator == 'LSA':
            self.estimator = TruncatedSVD(n_components=self.n_topics)
        elif estimator == 'NMF':
            self.estimator = NMF(n_components=self.n_topics)
        else:
            self.estimator = LatentDirichletAllocation(n_topics=self.n_topics)

        self.model = Pipeline([
            ('norm', TextNormalizer()),
            ('tfidf', CountVectorizer(tokenizer=identity,
                                      preprocessor=None, lowercase=False)),
            ('model', self.estimator)
        ]) 
Example 8
Project: Python-DevOps   Author: huseinzol05   File: topic.py    License: MIT License 6 votes vote down vote up
def train_lda(corpus, n_topics=10, max_df=0.95, min_df=2,
              cleaning=clearstring, stop_words='english'):
    if cleaning is not None:
        for i in range(len(corpus)):
            corpus[i] = cleaning(corpus[i])
    tf_vectorizer = CountVectorizer(
        max_df=max_df,
        min_df=min_df,
        stop_words=stop_words)
    tf = tf_vectorizer.fit_transform(corpus)
    tf_features = tf_vectorizer.get_feature_names()
    lda = LatentDirichletAllocation(
        n_topics=n_topics,
        max_iter=5,
        learning_method='online',
        learning_offset=50.,
        random_state=0).fit(tf)
    return TOPIC(tf_features, lda) 
Example 9
Project: pandas-ml   Author: pandas-ml   File: test_decomposition.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.decomposition.PCA, decomposition.PCA)
        self.assertIs(df.decomposition.IncrementalPCA,
                      decomposition.IncrementalPCA)
        self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA)
        self.assertIs(df.decomposition.FactorAnalysis,
                      decomposition.FactorAnalysis)
        self.assertIs(df.decomposition.FastICA, decomposition.FastICA)
        self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD)
        self.assertIs(df.decomposition.NMF, decomposition.NMF)
        self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA)
        self.assertIs(df.decomposition.MiniBatchSparsePCA,
                      decomposition.MiniBatchSparsePCA)
        self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder)
        self.assertIs(df.decomposition.DictionaryLearning,
                      decomposition.DictionaryLearning)
        self.assertIs(df.decomposition.MiniBatchDictionaryLearning,
                      decomposition.MiniBatchDictionaryLearning)

        self.assertIs(df.decomposition.LatentDirichletAllocation,
                      decomposition.LatentDirichletAllocation) 
Example 10
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def test_lda_preplexity_mismatch():
    # test dimension mismatch in `perplexity` method
    rng = np.random.RandomState(0)
    n_components = rng.randint(3, 6)
    n_samples = rng.randint(6, 10)
    X = np.random.randint(4, size=(n_samples, 10))
    lda = LatentDirichletAllocation(n_components=n_components,
                                    learning_offset=5., total_samples=20,
                                    random_state=rng)
    lda.fit(X)
    # invalid samples
    invalid_n_samples = rng.randint(4, size=(n_samples + 1, n_components))
    assert_raises_regexp(ValueError, r'Number of samples',
                         lda._perplexity_precomp_distr, X, invalid_n_samples)
    # invalid topic number
    invalid_n_components = rng.randint(4, size=(n_samples, n_components + 1))
    assert_raises_regexp(ValueError, r'Number of topics',
                         lda._perplexity_precomp_distr, X,
                         invalid_n_components) 
Example 11
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def test_lda_perplexity():
    # Test LDA perplexity for batch training
    # perplexity should be lower after each iteration
    n_components, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_components=n_components,
                                          max_iter=1, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_components=n_components,
                                          max_iter=10, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_1.fit(X)
        perp_1 = lda_1.perplexity(X, sub_sampling=False)

        lda_2.fit(X)
        perp_2 = lda_2.perplexity(X, sub_sampling=False)
        assert_greater_equal(perp_1, perp_2)

        perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True)
        perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True)
        assert_greater_equal(perp_1_subsampling, perp_2_subsampling) 
Example 12
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def test_lda_score():
    # Test LDA score for batch training
    # score should be higher after each iteration
    n_components, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_components=n_components,
                                          max_iter=1, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_components=n_components,
                                          max_iter=10, learning_method=method,
                                          total_samples=100, random_state=0)
        lda_1.fit_transform(X)
        score_1 = lda_1.score(X)

        lda_2.fit_transform(X)
        score_2 = lda_2.score(X)
        assert_greater_equal(score_2, score_1) 
Example 13
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def test_lda_fit_perplexity():
    # Test that the perplexity computed during fit is consistent with what is
    # returned by the perplexity method
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                    learning_method='batch', random_state=0,
                                    evaluate_every=1)
    lda.fit(X)

    # Perplexity computed at end of fit method
    perplexity1 = lda.bound_

    # Result of perplexity method on the train set
    perplexity2 = lda.perplexity(X)

    assert_almost_equal(perplexity1, perplexity2) 
Example 14
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_online_lda.py    License: MIT License 6 votes vote down vote up
def check_verbosity(verbose, evaluate_every, expected_lines,
                    expected_perplexities):
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components, max_iter=3,
                                    learning_method='batch',
                                    verbose=verbose,
                                    evaluate_every=evaluate_every,
                                    random_state=0)
    out = StringIO()
    old_out, sys.stdout = sys.stdout, out
    try:
        lda.fit(X)
    finally:
        sys.stdout = old_out

    n_lines = out.getvalue().count('\n')
    n_perplexity = out.getvalue().count('perplexity')
    assert_equal(expected_lines, n_lines)
    assert_equal(expected_perplexities, n_perplexity) 
Example 15
Project: tmtoolkit   Author: WZBSocialScienceCenter   File: test_topicmod_evaluate.py    License: Apache License 2.0 5 votes vote down vote up
def test_evaluation_sklearn_all_metrics():
    passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs', 'random_state'}
    varying_params = [dict(n_components=k) for k in range(2, 5)]
    const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1, random_state=1)

    evaluate_topic_models_kwargs = dict(
        metric=tm_sklearn.AVAILABLE_METRICS,
        held_out_documents_wallach09_n_samples=10,
        held_out_documents_wallach09_n_folds=2,
        coherence_gensim_vocab=EVALUATION_TEST_VOCAB,
        coherence_gensim_texts=EVALUATION_TEST_TOKENS,
        return_models=True,
    )

    eval_res = tm_sklearn.evaluate_topic_models(EVALUATION_TEST_DTM, varying_params, const_params,
                                                **evaluate_topic_models_kwargs)

    assert len(eval_res) == len(varying_params)

    for param_set, metric_results in eval_res:
        assert set(param_set.keys()) == passed_params
        assert set(metric_results.keys()) == set(tm_sklearn.AVAILABLE_METRICS + ('model',))

        assert metric_results['perplexity'] > 0
        assert 0 <= metric_results['cao_juan_2009'] <= 1
        assert 0 <= metric_results['arun_2010']
        assert metric_results['coherence_mimno_2011'] < 0
        assert np.isclose(metric_results['coherence_gensim_u_mass'], metric_results['coherence_mimno_2011'])
        assert 0 <= metric_results['coherence_gensim_c_v'] <= 1
        assert metric_results['coherence_gensim_c_uci'] < 0
        assert metric_results['coherence_gensim_c_npmi'] < 0

        if 'held_out_documents_wallach09' in tm_lda.AVAILABLE_METRICS:  # only if gmpy2 is installed
            assert metric_results['held_out_documents_wallach09'] < 0

        assert isinstance(metric_results['model'], LatentDirichletAllocation) 
Example 16
Project: tmtoolkit   Author: WZBSocialScienceCenter   File: test_topicmod_evaluate.py    License: Apache License 2.0 5 votes vote down vote up
def test_compute_models_parallel_sklearn():
    passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs'}
    varying_params = [dict(n_components=k) for k in range(2, 5)]
    const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1)

    models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM, varying_params, const_params)

    assert len(models) == len(varying_params)

    for param_set, model in models:
        assert set(param_set.keys()) == passed_params
        assert isinstance(model, LatentDirichletAllocation)
        assert isinstance(model.components_, np.ndarray) 
Example 17
Project: CheTo   Author: rdkit   File: chemTopicModel.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fitTopicModel(self, numTopics, max_iter=100, **kwargs):

        self.lda = LatentDirichletAllocation(n_topics=numTopics,learning_method=self.learningMethod,random_state=self.seed,
                                             n_jobs=1, max_iter=max_iter, batch_size=self.chunksize, **kwargs)
        if self.fragM.shape[0] > self.chunksize:
            # fit the model in chunks
            self.lda.learning_method = 'online'
            self.lda.fit(self.fragM)
        else:
            self.lda.fit(self.fragM) 
Example 18
Project: causal-text-embeddings   Author: blei-lab   File: helpers.py    License: MIT License 5 votes vote down vote up
def learn_topics(X, X_dev, K=50):
	lda = LatentDirichletAllocation(n_components=K, learning_method='online', verbose=1)
	print("Fitting", K, "topics...")
	lda.fit(X)
	score = lda.perplexity(X_dev)
	print("Log likelihood:", score)
	topics = lda.components_
	return score, lda, topics 
Example 19
Project: causal-text-embeddings   Author: blei-lab   File: helpers.py    License: MIT License 5 votes vote down vote up
def learn_topics(X, X_dev, K=50):
	lda = LatentDirichletAllocation(n_components=K, learning_method='online', verbose=1)
	print("Fitting", K, "topics...")
	lda.fit(X)
	score = lda.perplexity(X_dev)
	print("Log likelihood:", score)
	topics = lda.components_
	return score, lda, topics 
Example 20
Project: causal-text-embeddings   Author: blei-lab   File: helpers.py    License: MIT License 5 votes vote down vote up
def learn_topics(X, X_dev, K=50):
	lda = LatentDirichletAllocation(n_components=K, learning_method='online', verbose=1)
	print("Fitting", K, "topics...")
	lda.fit(X)
	score = lda.perplexity(X_dev)
	print("Log likelihood:", score)
	topics = lda.components_
	return score, lda, topics 
Example 21
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_default_prior_params():
    # default prior parameter should be `1 / topics`
    # and verbose params should not affect result
    n_components, X = _build_sparse_mtx()
    prior = 1. / n_components
    lda_1 = LatentDirichletAllocation(n_components=n_components,
                                      doc_topic_prior=prior,
                                      topic_word_prior=prior, random_state=0)
    lda_2 = LatentDirichletAllocation(n_components=n_components,
                                      random_state=0)
    topic_distr_1 = lda_1.fit_transform(X)
    topic_distr_2 = lda_2.fit_transform(X)
    assert_almost_equal(topic_distr_1, topic_distr_2) 
Example 22
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_fit_batch():
    # Test LDA batch learning_offset (`fit` method with 'batch' learning)
    rng = np.random.RandomState(0)
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components,
                                    evaluate_every=1, learning_method='batch',
                                    random_state=rng)
    lda.fit(X)

    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
    for component in lda.components_:
        # Find top 3 words in each LDA component
        top_idx = set(component.argsort()[-3:][::-1])
        assert tuple(sorted(top_idx)) in correct_idx_grps 
Example 23
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_fit_online():
    # Test LDA online learning (`fit` method with 'online' learning)
    rng = np.random.RandomState(0)
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components,
                                    learning_offset=10., evaluate_every=1,
                                    learning_method='online', random_state=rng)
    lda.fit(X)

    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
    for component in lda.components_:
        # Find top 3 words in each LDA component
        top_idx = set(component.argsort()[-3:][::-1])
        assert tuple(sorted(top_idx)) in correct_idx_grps 
Example 24
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_partial_fit():
    # Test LDA online learning (`partial_fit` method)
    # (same as test_lda_batch)
    rng = np.random.RandomState(0)
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components,
                                    learning_offset=10., total_samples=100,
                                    random_state=rng)
    for i in range(3):
        lda.partial_fit(X)

    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
    for c in lda.components_:
        top_idx = set(c.argsort()[-3:][::-1])
        assert tuple(sorted(top_idx)) in correct_idx_grps 
Example 25
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_dense_input():
    # Test LDA with dense input.
    rng = np.random.RandomState(0)
    n_components, X = _build_sparse_mtx()
    lda = LatentDirichletAllocation(n_components=n_components,
                                    learning_method='batch', random_state=rng)
    lda.fit(X.toarray())

    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
    for component in lda.components_:
        # Find top 3 words in each LDA component
        top_idx = set(component.argsort()[-3:][::-1])
        assert tuple(sorted(top_idx)) in correct_idx_grps 
Example 26
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_fit_transform(method):
    # Test LDA fit_transform & transform
    # fit_transform and transform result should be the same
    rng = np.random.RandomState(0)
    X = rng.randint(10, size=(50, 20))
    lda = LatentDirichletAllocation(n_components=5, learning_method=method,
                                    random_state=rng)
    X_fit = lda.fit_transform(X)
    X_trans = lda.transform(X)
    assert_array_almost_equal(X_fit, X_trans, 4) 
Example 27
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_partial_fit_dim_mismatch():
    # test `n_features` mismatch in `partial_fit`
    rng = np.random.RandomState(0)
    n_components = rng.randint(3, 6)
    n_col = rng.randint(6, 10)
    X_1 = np.random.randint(4, size=(10, n_col))
    X_2 = np.random.randint(4, size=(10, n_col + 1))
    lda = LatentDirichletAllocation(n_components=n_components,
                                    learning_offset=5., total_samples=20,
                                    random_state=rng)
    lda.partial_fit(X_1)
    assert_raises_regexp(ValueError, r"^The provided data has",
                         lda.partial_fit, X_2) 
Example 28
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_invalid_params():
    # test `_check_params` method
    X = np.ones((5, 10))

    invalid_models = (
        ('n_components', LatentDirichletAllocation(n_components=0)),
        ('learning_method',
         LatentDirichletAllocation(learning_method='unknown')),
        ('total_samples', LatentDirichletAllocation(total_samples=0)),
        ('learning_offset', LatentDirichletAllocation(learning_offset=-1)),
    )
    for param, model in invalid_models:
        regex = r"^Invalid %r parameter" % param
        assert_raises_regexp(ValueError, regex, model.fit, X) 
Example 29
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_negative_input():
    # test pass dense matrix with sparse negative input.
    X = np.full((5, 10), -1.)
    lda = LatentDirichletAllocation()
    regex = r"^Negative values in data passed"
    assert_raises_regexp(ValueError, regex, lda.fit, X) 
Example 30
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_online_lda.py    License: MIT License 5 votes vote down vote up
def test_lda_no_component_error():
    # test `transform` and `perplexity` before `fit`
    rng = np.random.RandomState(0)
    X = rng.randint(4, size=(20, 10))
    lda = LatentDirichletAllocation()
    regex = r"^no 'components_' attribute"
    assert_raises_regexp(NotFittedError, regex, lda.transform, X)
    assert_raises_regexp(NotFittedError, regex, lda.perplexity, X)