Python sklearn.decomposition.NMF Examples

The following are 30 code examples for showing how to use sklearn.decomposition.NMF(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.decomposition , or try the search function .

Example 1
Project: poem_generator   Author: lijiancheng0614   File: get_topic.py    License: Apache License 2.0 6 votes vote down vote up
def write_topics(ftopics, fwords, ftopics_words, poem_words, n_topic, n_topic_words):
    count_matrix = count_vect.fit_transform(poem_words)
    tfidf = TfidfTransformer().fit_transform(count_matrix)
    nmf = decomposition.NMF(n_components=n_topic).fit(tfidf)
    feature_names = count_vect.get_feature_names()
    fw = codecs.open(ftopics, 'w', 'utf-8')
    for topic in nmf.components_:
        fw.write(' '.join([feature_names[i] for i in topic.argsort()[:-n_topic_words - 1:-1]]) + '\n')
    fw.close()
    print('Write topics done.')
    fw = codecs.open(fwords, 'wb')
    pickle.dump(feature_names, fw)
    fw.close()
    print('Write words done.')
    fw = codecs.open(ftopics_words, 'wb')
    pickle.dump(nmf.components_, fw)
    fw.close()
    print('Write topic_words done.') 
Example 2
Project: karateclub   Author: benedekrozemberczki   File: boostne.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _fit_and_score_NMF(self, new_residuals):
        """
        Factorizing a residual matrix, returning the approximate target, and an embedding.

        Arg types:
            * **new_residuals** *(COO Scipy matrix)* - The residual matrix.

        Return types:
            * **scores** *(COO Scipy matrix)* - The residual scores.
            * **W** *(Numpy array)* - The embedding matrix.
        """
        model = NMF(n_components=self.dimensions,
                    init="random",
                    verbose=False,
                    alpha=self.alpha)

        W = model.fit_transform(new_residuals)
        H = model.components_

        sub_scores = np.sum(np.multiply(W[self._index_1, :], H[:, self._index_2].T), axis=1)
        scores = np.maximum(self._residuals.data-sub_scores, 0)
        scores = sparse.csr_matrix((scores, (self._index_1, self._index_2)),
                                   shape=self._shape,
                                   dtype=np.float32)
        return scores, W 
Example 3
Project: atap   Author: foxbook   File: topics.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, n_topics=50, estimator='LDA'):
        """
        n_topics is the desired number of topics
        To use Latent Semantic Analysis, set estimator to 'LSA',
        To use Non-Negative Matrix Factorization, set estimator to 'NMF',
        otherwise, defaults to Latent Dirichlet Allocation ('LDA').
        """
        self.n_topics = n_topics

        if estimator == 'LSA':
            self.estimator = TruncatedSVD(n_components=self.n_topics)
        elif estimator == 'NMF':
            self.estimator = NMF(n_components=self.n_topics)
        else:
            self.estimator = LatentDirichletAllocation(n_topics=self.n_topics)

        self.model = Pipeline([
            ('norm', TextNormalizer()),
            ('tfidf', CountVectorizer(tokenizer=identity,
                                      preprocessor=None, lowercase=False)),
            ('model', self.estimator)
        ]) 
Example 4
Project: BoostedFactorization   Author: benedekrozemberczki   File: boosted_embedding.py    License: GNU General Public License v3.0 6 votes vote down vote up
def fit_and_score_NMF(self, new_residuals):
        """
        Factorizing a residual matrix, returning the approximate target and an embedding.
        :param new_residuals: Input target matrix.
        :return scores: Approximate target matrix.
        :return W: Embedding matrix.
        """
        model = NMF(n_components=self.args.dimensions,
                    init="random",
                    verbose=False,
                    alpha=self.args.alpha)

        W = model.fit_transform(new_residuals)
        H = model.components_
        print("Scoring started.\n")
        sub_scores = np.sum(np.multiply(W[self.index_1, :], H[:, self.index_2].T), axis=1)
        scores = np.maximum(self.residuals.data-sub_scores, 0)
        scores = sparse.csr_matrix((scores, (self.index_1, self.index_2)),
                                   shape=self.shape,
                                   dtype=np.float32)
        return scores, W 
Example 5
Project: yelp   Author: melqkiades   File: main.py    License: GNU Lesser General Public License v2.1 6 votes vote down vote up
def factorize_nmf():
    print('factorizing matrix')

    newsgroups_mmf_file = '/Users/fpena/tmp/nmf_graphlab/newsgroups/newsgroups_matrix.mmf'
    document_term_matrix = mmread(newsgroups_mmf_file)

    factorizer = decomposition.NMF(
        init="nndsvd", n_components=Constants.TOPIC_MODEL_NUM_TOPICS,
        max_iter=Constants.TOPIC_MODEL_ITERATIONS,
        alpha=Constants.NMF_REGULARIZATION,
        l1_ratio=Constants.NMF_REGULARIZATION_RATIO
    )
    document_topic_matrix = \
        factorizer.fit_transform(document_term_matrix)
    topic_term_matrix = factorizer.components_
    # mmwrite(mmf_file, small_matrix)
    # mmwrite(newsgroups_mmf_file, X) 
Example 6
Project: Python-DevOps   Author: huseinzol05   File: topic.py    License: MIT License 6 votes vote down vote up
def train_nmf(corpus, n_topics=10, max_df=0.95, min_df=2,
              cleaning=clearstring, stop_words='english'):
    if cleaning is not None:
        for i in range(len(corpus)):
            corpus[i] = cleaning(corpus[i])
    tfidf_vectorizer = TfidfVectorizer(
        max_df=max_df, min_df=min_df, stop_words=stop_words)
    tfidf = tfidf_vectorizer.fit_transform(corpus)
    tfidf_features = tfidf_vectorizer.get_feature_names()
    nmf = NMF(
        n_components=n_topics,
        random_state=1,
        alpha=.1,
        l1_ratio=.5,
        init='nndsvd').fit(tfidf)
    return TOPIC(tfidf_features, nmf) 
Example 7
Project: sklearn-onnx   Author: onnx   File: plot_nmf.py    License: MIT License 6 votes vote down vote up
def nmf_to_onnx(W, H, op_version=12):
    """
    The function converts a NMF described by matrices
    *W*, *H* (*WH* approximate training data *M*).
    into a function which takes two indices *(i, j)*
    and returns the predictions for it. It assumes
    these indices applies on the training data.
    """
    col = OnnxArrayFeatureExtractor(H, 'col')
    row = OnnxArrayFeatureExtractor(W.T, 'row')
    dot = OnnxMul(col, row, op_version=op_version)
    res = OnnxReduceSum(dot, output_names="rec", op_version=op_version)
    indices_type = np.array([0], dtype=np.int64)
    onx = res.to_onnx(inputs={'col': indices_type,
                              'row': indices_type},
                      outputs=[('rec', FloatTensorType((None, 1)))],
                      target_opset=op_version)
    return onx 
Example 8
Project: GraphRole   Author: dkaslovsky   File: factor.py    License: MIT License 6 votes vote down vote up
def get_nmf_decomposition(
    X: np.ndarray,
    n_roles: int,
) -> FactorTuple:
    """
    Compute NMF decomposition
    :param X: matrix to factor
    :param n_roles: rank of decomposition
    """
    nmf = NMF(n_components=n_roles, solver='mu', init='nndsvda')
    with warnings.catch_warnings():
        # ignore convergence warning from NMF since
        # this will result in a large cost anyways
        warnings.simplefilter('ignore')
        G = nmf.fit_transform(X)
        F = nmf.components_
    return G, F 
Example 9
Project: pandas-ml   Author: pandas-ml   File: test_decomposition.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.decomposition.PCA, decomposition.PCA)
        self.assertIs(df.decomposition.IncrementalPCA,
                      decomposition.IncrementalPCA)
        self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA)
        self.assertIs(df.decomposition.FactorAnalysis,
                      decomposition.FactorAnalysis)
        self.assertIs(df.decomposition.FastICA, decomposition.FastICA)
        self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD)
        self.assertIs(df.decomposition.NMF, decomposition.NMF)
        self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA)
        self.assertIs(df.decomposition.MiniBatchSparsePCA,
                      decomposition.MiniBatchSparsePCA)
        self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder)
        self.assertIs(df.decomposition.DictionaryLearning,
                      decomposition.DictionaryLearning)
        self.assertIs(df.decomposition.MiniBatchDictionaryLearning,
                      decomposition.MiniBatchDictionaryLearning)

        self.assertIs(df.decomposition.LatentDirichletAllocation,
                      decomposition.LatentDirichletAllocation) 
Example 10
Project: role2vec   Author: benedekrozemberczki   File: motif_count.py    License: GNU General Public License v3.0 6 votes vote down vote up
def factorize_string_matrix(self):
        """
        Creating string labels by factorization.
        """
        rows = [node for node, features in self.binned_features.items() for feature in features]
        columns = [int(feature) for node, features in self.binned_features.items() for feature in features]
        scores = [1 for i in range(len(columns))]
        row_number = max(rows)+1
        column_number = max(columns)+1
        features = csr_matrix((scores, (rows, columns)), shape=(row_number, column_number))
        model = NMF(n_components=self.args.factors, init="random", random_state=self.args.seed, alpha=self.args.beta)
        factors = model.fit_transform(features)
        kmeans = KMeans(n_clusters=self.args.clusters, random_state=self.args.seed).fit(factors)
        labels = kmeans.labels_
        features = {str(node): str(labels[node]) for node in self.graph.nodes()}
        return features 
Example 11
Project: topic-stability   Author: derekgreene   File: nmf.py    License: Apache License 2.0 6 votes vote down vote up
def apply( self, X, k = 2 ):
		"""
		Apply NMF to the specified document-term matrix X.
		"""
		import nimfa
		self.W = None
		self.H = None
		initialize_only = self.max_iters < 1
		if self.update == "euclidean":
			objective = "fro"
		else:
			objective = "div"
		lsnmf = nimfa.Lsnmf(X, max_iter = self.max_iters, rank = k, seed = self.init_strategy, update = self.update, objective = objective, test_conv = self.test_conv ) 
		res = lsnmf()
		# TODO: fix
		try:
			self.W = res.basis().todense() 
			self.H = res.coef().todense()
		except:
			self.W = res.basis()
			self.H = res.coef()
		# last number of iterations
		self.n_iter = res.n_iter 
Example 12
Project: scattertext   Author: JasonKessler   File: SentencesForTopicModeling.py    License: Apache License 2.0 5 votes vote down vote up
def get_topics_from_model(
			self,
			pipe=Pipeline([
				('tfidf', TfidfTransformer(sublinear_tf=True)),
				('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))]),
			num_terms_per_topic=10):
		'''

		Parameters
		----------
		pipe : Pipeline
			For example, `Pipeline([
				('tfidf', TfidfTransformer(sublinear_tf=True)),
				('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))])`
			The last transformer must populate a `components_` attribute when finished.
		num_terms_per_topic : int

		Returns
		-------
		dict: {term: [term1, ...], ...}
		'''
		pipe.fit_transform(self.sentX)

		topic_model = {}
		for topic_idx, topic in enumerate(pipe._final_estimator.components_):
			term_list = [self.termidxstore.getval(i)
			             for i
			             in topic.argsort()[:-num_terms_per_topic - 1:-1]
			             if topic[i] > 0]
			if len(term_list) > 0:
				topic_model['%s. %s' % (topic_idx, term_list[0])] = term_list
			else:
				Warning("Topic %s has no terms with scores > 0. Omitting." % (topic_idx))
		return topic_model 
Example 13
Project: MNIST-baselines   Author: cxy1997   File: utils.py    License: MIT License 5 votes vote down vote up
def skNMF(data, dim):
    model = NMF(n_components=dim)
    model.fit(data)
    return model.transform(data)

# Max-min norm 
Example 14
Project: karateclub   Author: benedekrozemberczki   File: danmf.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _sklearn_pretrain(self, i):
        """
        Pre-training a single layer of the model with sklearn.

        Arg types:
            * **i** *(int)* - The layer index.
        """
        nmf_model = NMF(n_components=self.layers[i],
                        init="random",
                        random_state=self.seed,
                        max_iter=self.pre_iterations)

        U = nmf_model.fit_transform(self._Z)
        V = nmf_model.components_
        return U, V 
Example 15
Project: karateclub   Author: benedekrozemberczki   File: danmf.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _pre_training(self):
        """
        Pre-training each NMF layer.
        """
        self._U_s = []
        self._V_s = []
        for i in range(self._p):
            self._setup_z(i)
            U, V = self._sklearn_pretrain(i)
            self._U_s.append(U)
            self._V_s.append(V) 
Example 16
Project: karateclub   Author: benedekrozemberczki   File: boostne.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _setup_base_model(self):
        """
        Fitting NMF on the starting matrix.
        """
        self._shape = self._residuals.shape
        indices = self._residuals.nonzero()
        self._index_1 = indices[0]
        self._index_2 = indices[1]
        base_score, embedding = self._fit_and_score_NMF(self._residuals)
        self._embeddings = [embedding] 
Example 17
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_estimator_checks.py    License: MIT License 5 votes vote down vote up
def test_check_estimator_clones():
    # check that check_estimator doesn't modify the estimator it receives
    from sklearn.datasets import load_iris
    iris = load_iris()

    for Estimator in [GaussianMixture, LinearRegression,
                      RandomForestClassifier, NMF, SGDClassifier,
                      MiniBatchKMeans]:
        with ignore_warnings(category=(FutureWarning, DeprecationWarning)):
            # when 'est = SGDClassifier()'
            est = Estimator()
            set_checking_parameters(est)
            set_random_state(est)
            # without fitting
            old_hash = _joblib.hash(est)
            check_estimator(est)
        assert_equal(old_hash, _joblib.hash(est))

        with ignore_warnings(category=(FutureWarning, DeprecationWarning)):
            # when 'est = SGDClassifier()'
            est = Estimator()
            set_checking_parameters(est)
            set_random_state(est)
            # with fitting
            est.fit(iris.data + 10, iris.target)
            old_hash = _joblib.hash(est)
            check_estimator(est)
        assert_equal(old_hash, _joblib.hash(est)) 
Example 18
Project: rampy   Author: charlesll   File: ml_exploration.py    License: GNU General Public License v2.0 5 votes vote down vote up
def fit(self):
        """Train the model with the indicated algorithm.

        Do not forget to tune the hyperparameters.

        """
        if self.algorithm == "PCA":
            self.model = PCA(n_components=self.nb_compo)
        elif self.algorithm == "NMF":
            self.model = NMF(n_components=self.nb_compo,init = "nndsvd")

        if self.scaling == True:
            self.model.fit(self.X_train_sc)
        else:
            self.model.fit(self.X_train) 
Example 19
Project: talkingdata-adtracking-fraud-detection   Author: flowlight0   File: category_vector.py    License: MIT License 5 votes vote down vote up
def transformer_factory(self) -> TransformerMixin:
        return NMF(n_components=self.width, random_state=71) 
Example 20
Project: DANMF   Author: benedekrozemberczki   File: danmf.py    License: GNU General Public License v3.0 5 votes vote down vote up
def sklearn_pretrain(self, i):
        """
        Pretraining a single layer of the model with sklearn.
        :param i: Layer index.
        """
        nmf_model = NMF(n_components=self.args.layers[i],
                        init="random",
                        random_state=self.args.seed,
                        max_iter=self.args.pre_iterations)

        U = nmf_model.fit_transform(self.Z)
        V = nmf_model.components_
        return U, V 
Example 21
Project: DANMF   Author: benedekrozemberczki   File: danmf.py    License: GNU General Public License v3.0 5 votes vote down vote up
def pre_training(self):
        """
        Pre-training each NMF layer.
        """
        print("\nLayer pre-training started. \n")
        self.U_s = []
        self.V_s = []
        for i in tqdm(range(self.p), desc="Layers trained: ", leave=True):
            self.setup_z(i)
            U, V = self.sklearn_pretrain(i)
            self.U_s.append(U)
            self.V_s.append(V) 
Example 22
Project: dynamic-nmf   Author: derekgreene   File: nmf.py    License: Apache License 2.0 5 votes vote down vote up
def apply( self, X, k = 2 ):
		"""
		Apply NMF to the specified document-term matrix X.
		"""
		self.W = None
		self.H = None
		model = decomposition.NMF(init=self.init_strategy, n_components=k, max_iter=self.max_iters, random_state = self.random_seed)
		self.W = model.fit_transform(X)
		self.H = model.components_ 
Example 23
Project: dynamic-nmf   Author: derekgreene   File: nmf.py    License: Apache License 2.0 5 votes vote down vote up
def rank_terms( self, topic_index, top = -1 ):
		"""
		Return the top ranked terms for the specified topic, generated during the last NMF run.
		"""
		if self.H is None:
			raise ValueError("No results for previous run available")
		# NB: reverse
		top_indices = np.argsort( self.H[topic_index,:] )[::-1]
		# truncate if necessary
		if top < 1 or top > len(top_indices):
			return top_indices
		return top_indices[0:top] 
Example 24
Project: dynamic-nmf   Author: derekgreene   File: nmf.py    License: Apache License 2.0 5 votes vote down vote up
def generate_doc_rankings( W ):
	'''
	Rank document indices, based on values in a W factor matrix produced by NMF.
	'''
	doc_rankings = []
	k = W.shape[1]
	for topic_index in range(k):
		w = np.array( W[:,topic_index] )
		top_indices = np.argsort(w)[::-1]
		doc_rankings.append(top_indices)
	return doc_rankings 
Example 25
Project: dynamic-nmf   Author: derekgreene   File: nmf.py    License: Apache License 2.0 5 votes vote down vote up
def save_nmf_results( out_path, doc_ids, terms, term_rankings, partition, W, H, topic_labels=None ):
	"""
	Save output of NMF using Joblib. Note that we use the scikit-learn bundled version of joblib.
	"""
	# no labels? generate some standard ones
	if topic_labels is None:
		topic_labels = []
		for i in range( len(term_rankings) ):
			topic_labels.append( "C%02d" % (i+1) )
	log.info( "Saving NMF results to %s" % out_path )
	joblib.dump((doc_ids, terms, term_rankings, partition, W, H, topic_labels), out_path ) 
Example 26
Project: dynamic-nmf   Author: derekgreene   File: nmf.py    License: Apache License 2.0 5 votes vote down vote up
def load_nmf_results( in_path ):
	"""
	Load NMF results using Joblib. Note that we use the scikit-learn bundled version of joblib.
	"""
	(doc_ids, terms, term_rankings, partition, W, H, labels) = joblib.load( in_path )
	return (doc_ids, terms, term_rankings, partition, W, H, labels) 
Example 27
Project: tbp-next-basket   Author: GiulioRossetti   File: nmf.py    License: GNU General Public License v3.0 5 votes vote down vote up
def build_model(self, baskets, use_probabilities=False):
        # print 'build V'
        self.__buildV(baskets, use_probabilities)
        # print 'density', 1.0 * len(self.V.nonzero()[0]) / (self.V.shape[0] * self.V.shape[1])

        sknmf = SKNMF(n_components=self.n_factor, init='random', solver='cd', tol=self.tol, max_iter=self.max_iter,
                      alpha=self.alpha, l1_ratio=self.l1_ratio, beta=self.beta)

        self.W = sknmf.fit_transform(self.V)
        self.H = sknmf.components_
        self.R = np.dot(self.W, self.H)

        self.__state = 'built'

        return self 
Example 28
Project: lale   Author: IBM   File: nmf.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, n_components=None, init=None, solver='cd', beta_loss='frobenius', tol=0.0001, max_iter=200, random_state=None, alpha=0.0, l1_ratio=0.0, verbose=0, shuffle=False):
        self._hyperparams = {
            'n_components': n_components,
            'init': init,
            'solver': solver,
            'beta_loss': beta_loss,
            'tol': tol,
            'max_iter': max_iter,
            'random_state': random_state,
            'alpha': alpha,
            'l1_ratio': l1_ratio,
            'verbose': verbose,
            'shuffle': shuffle}
        self._wrapped_model = SKLModel(**self._hyperparams) 
Example 29
Project: mltk-algo-contrib   Author: splunk   File: NMF.py    License: Apache License 2.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params', {}),
            floats=['beta_loss','tol','alpha','l1_ratio'],
            strs=['init','solver'],
            ints=['k','max_iter','random_state'],
            bools=['versbose','shuffle'],
            aliases={'k': 'n_components'}
        )

        self.estimator = _NMF(**out_params) 
Example 30
Project: yelp   Author: melqkiades   File: nmf_context_extractor.py    License: GNU Lesser General Public License v2.1 5 votes vote down vote up
def build_topic_model(self):
        print('%s: building NMF topic model' %
              time.strftime("%Y/%m/%d-%H:%M:%S"))

        self.topic_model = decomposition.NMF(
            init="nndsvd", n_components=self.num_topics,
            max_iter=Constants.TOPIC_MODEL_ITERATIONS)
        self.document_topic_matrix =\
            self.topic_model.fit_transform(self.document_term_matrix)
        self.topic_term_matrix = self.topic_model.components_

        print('%s: topic model built' %
              time.strftime("%Y/%m/%d-%H:%M:%S"))