Python sklearn.decomposition.NMF Examples

The following are 30 code examples of sklearn.decomposition.NMF(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.decomposition , or try the search function .
Example #1
Source File: nmf.py    From dynamic-nmf with Apache License 2.0 7 votes vote down vote up
def rank_terms( self, topic_index, top = -1 ):
		"""
		Return the top ranked terms for the specified topic, generated during the last NMF run.
		"""
		if self.H is None:
			raise ValueError("No results for previous run available")
		# NB: reverse
		top_indices = np.argsort( self.H[topic_index,:] )[::-1]
		# truncate if necessary
		if top < 1 or top > len(top_indices):
			return top_indices
		return top_indices[0:top] 
Example #2
Source File: get_topic.py    From poem_generator with Apache License 2.0 7 votes vote down vote up
def write_topics(ftopics, fwords, ftopics_words, poem_words, n_topic, n_topic_words):
    count_matrix = count_vect.fit_transform(poem_words)
    tfidf = TfidfTransformer().fit_transform(count_matrix)
    nmf = decomposition.NMF(n_components=n_topic).fit(tfidf)
    feature_names = count_vect.get_feature_names()
    fw = codecs.open(ftopics, 'w', 'utf-8')
    for topic in nmf.components_:
        fw.write(' '.join([feature_names[i] for i in topic.argsort()[:-n_topic_words - 1:-1]]) + '\n')
    fw.close()
    print('Write topics done.')
    fw = codecs.open(fwords, 'wb')
    pickle.dump(feature_names, fw)
    fw.close()
    print('Write words done.')
    fw = codecs.open(ftopics_words, 'wb')
    pickle.dump(nmf.components_, fw)
    fw.close()
    print('Write topic_words done.') 
Example #3
Source File: test_decomposition.py    From pandas-ml with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.decomposition.PCA, decomposition.PCA)
        self.assertIs(df.decomposition.IncrementalPCA,
                      decomposition.IncrementalPCA)
        self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA)
        self.assertIs(df.decomposition.FactorAnalysis,
                      decomposition.FactorAnalysis)
        self.assertIs(df.decomposition.FastICA, decomposition.FastICA)
        self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD)
        self.assertIs(df.decomposition.NMF, decomposition.NMF)
        self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA)
        self.assertIs(df.decomposition.MiniBatchSparsePCA,
                      decomposition.MiniBatchSparsePCA)
        self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder)
        self.assertIs(df.decomposition.DictionaryLearning,
                      decomposition.DictionaryLearning)
        self.assertIs(df.decomposition.MiniBatchDictionaryLearning,
                      decomposition.MiniBatchDictionaryLearning)

        self.assertIs(df.decomposition.LatentDirichletAllocation,
                      decomposition.LatentDirichletAllocation) 
Example #4
Source File: topic.py    From Python-DevOps with MIT License 6 votes vote down vote up
def train_nmf(corpus, n_topics=10, max_df=0.95, min_df=2,
              cleaning=clearstring, stop_words='english'):
    if cleaning is not None:
        for i in range(len(corpus)):
            corpus[i] = cleaning(corpus[i])
    tfidf_vectorizer = TfidfVectorizer(
        max_df=max_df, min_df=min_df, stop_words=stop_words)
    tfidf = tfidf_vectorizer.fit_transform(corpus)
    tfidf_features = tfidf_vectorizer.get_feature_names()
    nmf = NMF(
        n_components=n_topics,
        random_state=1,
        alpha=.1,
        l1_ratio=.5,
        init='nndsvd').fit(tfidf)
    return TOPIC(tfidf_features, nmf) 
Example #5
Source File: topics.py    From atap with Apache License 2.0 6 votes vote down vote up
def __init__(self, n_topics=50, estimator='LDA'):
        """
        n_topics is the desired number of topics
        To use Latent Semantic Analysis, set estimator to 'LSA',
        To use Non-Negative Matrix Factorization, set estimator to 'NMF',
        otherwise, defaults to Latent Dirichlet Allocation ('LDA').
        """
        self.n_topics = n_topics

        if estimator == 'LSA':
            self.estimator = TruncatedSVD(n_components=self.n_topics)
        elif estimator == 'NMF':
            self.estimator = NMF(n_components=self.n_topics)
        else:
            self.estimator = LatentDirichletAllocation(n_topics=self.n_topics)

        self.model = Pipeline([
            ('norm', TextNormalizer()),
            ('tfidf', CountVectorizer(tokenizer=identity,
                                      preprocessor=None, lowercase=False)),
            ('model', self.estimator)
        ]) 
Example #6
Source File: boosted_embedding.py    From BoostedFactorization with GNU General Public License v3.0 6 votes vote down vote up
def fit_and_score_NMF(self, new_residuals):
        """
        Factorizing a residual matrix, returning the approximate target and an embedding.
        :param new_residuals: Input target matrix.
        :return scores: Approximate target matrix.
        :return W: Embedding matrix.
        """
        model = NMF(n_components=self.args.dimensions,
                    init="random",
                    verbose=False,
                    alpha=self.args.alpha)

        W = model.fit_transform(new_residuals)
        H = model.components_
        print("Scoring started.\n")
        sub_scores = np.sum(np.multiply(W[self.index_1, :], H[:, self.index_2].T), axis=1)
        scores = np.maximum(self.residuals.data-sub_scores, 0)
        scores = sparse.csr_matrix((scores, (self.index_1, self.index_2)),
                                   shape=self.shape,
                                   dtype=np.float32)
        return scores, W 
Example #7
Source File: plot_nmf.py    From sklearn-onnx with MIT License 6 votes vote down vote up
def nmf_to_onnx(W, H, op_version=12):
    """
    The function converts a NMF described by matrices
    *W*, *H* (*WH* approximate training data *M*).
    into a function which takes two indices *(i, j)*
    and returns the predictions for it. It assumes
    these indices applies on the training data.
    """
    col = OnnxArrayFeatureExtractor(H, 'col')
    row = OnnxArrayFeatureExtractor(W.T, 'row')
    dot = OnnxMul(col, row, op_version=op_version)
    res = OnnxReduceSum(dot, output_names="rec", op_version=op_version)
    indices_type = np.array([0], dtype=np.int64)
    onx = res.to_onnx(inputs={'col': indices_type,
                              'row': indices_type},
                      outputs=[('rec', FloatTensorType((None, 1)))],
                      target_opset=op_version)
    return onx 
Example #8
Source File: factor.py    From GraphRole with MIT License 6 votes vote down vote up
def get_nmf_decomposition(
    X: np.ndarray,
    n_roles: int,
) -> FactorTuple:
    """
    Compute NMF decomposition
    :param X: matrix to factor
    :param n_roles: rank of decomposition
    """
    nmf = NMF(n_components=n_roles, solver='mu', init='nndsvda')
    with warnings.catch_warnings():
        # ignore convergence warning from NMF since
        # this will result in a large cost anyways
        warnings.simplefilter('ignore')
        G = nmf.fit_transform(X)
        F = nmf.components_
    return G, F 
Example #9
Source File: motif_count.py    From role2vec with GNU General Public License v3.0 6 votes vote down vote up
def factorize_string_matrix(self):
        """
        Creating string labels by factorization.
        """
        rows = [node for node, features in self.binned_features.items() for feature in features]
        columns = [int(feature) for node, features in self.binned_features.items() for feature in features]
        scores = [1 for i in range(len(columns))]
        row_number = max(rows)+1
        column_number = max(columns)+1
        features = csr_matrix((scores, (rows, columns)), shape=(row_number, column_number))
        model = NMF(n_components=self.args.factors, init="random", random_state=self.args.seed, alpha=self.args.beta)
        factors = model.fit_transform(features)
        kmeans = KMeans(n_clusters=self.args.clusters, random_state=self.args.seed).fit(factors)
        labels = kmeans.labels_
        features = {str(node): str(labels[node]) for node in self.graph.nodes()}
        return features 
Example #10
Source File: boostne.py    From karateclub with GNU General Public License v3.0 6 votes vote down vote up
def _fit_and_score_NMF(self, new_residuals):
        """
        Factorizing a residual matrix, returning the approximate target, and an embedding.

        Arg types:
            * **new_residuals** *(COO Scipy matrix)* - The residual matrix.

        Return types:
            * **scores** *(COO Scipy matrix)* - The residual scores.
            * **W** *(Numpy array)* - The embedding matrix.
        """
        model = NMF(n_components=self.dimensions,
                    init="random",
                    verbose=False,
                    alpha=self.alpha)

        W = model.fit_transform(new_residuals)
        H = model.components_

        sub_scores = np.sum(np.multiply(W[self._index_1, :], H[:, self._index_2].T), axis=1)
        scores = np.maximum(self._residuals.data-sub_scores, 0)
        scores = sparse.csr_matrix((scores, (self._index_1, self._index_2)),
                                   shape=self._shape,
                                   dtype=np.float32)
        return scores, W 
Example #11
Source File: nmf.py    From topic-stability with Apache License 2.0 6 votes vote down vote up
def apply( self, X, k = 2 ):
		"""
		Apply NMF to the specified document-term matrix X.
		"""
		import nimfa
		self.W = None
		self.H = None
		initialize_only = self.max_iters < 1
		if self.update == "euclidean":
			objective = "fro"
		else:
			objective = "div"
		lsnmf = nimfa.Lsnmf(X, max_iter = self.max_iters, rank = k, seed = self.init_strategy, update = self.update, objective = objective, test_conv = self.test_conv ) 
		res = lsnmf()
		# TODO: fix
		try:
			self.W = res.basis().todense() 
			self.H = res.coef().todense()
		except:
			self.W = res.basis()
			self.H = res.coef()
		# last number of iterations
		self.n_iter = res.n_iter 
Example #12
Source File: main.py    From yelp with GNU Lesser General Public License v2.1 6 votes vote down vote up
def factorize_nmf():
    print('factorizing matrix')

    newsgroups_mmf_file = '/Users/fpena/tmp/nmf_graphlab/newsgroups/newsgroups_matrix.mmf'
    document_term_matrix = mmread(newsgroups_mmf_file)

    factorizer = decomposition.NMF(
        init="nndsvd", n_components=Constants.TOPIC_MODEL_NUM_TOPICS,
        max_iter=Constants.TOPIC_MODEL_ITERATIONS,
        alpha=Constants.NMF_REGULARIZATION,
        l1_ratio=Constants.NMF_REGULARIZATION_RATIO
    )
    document_topic_matrix = \
        factorizer.fit_transform(document_term_matrix)
    topic_term_matrix = factorizer.components_
    # mmwrite(mmf_file, small_matrix)
    # mmwrite(newsgroups_mmf_file, X) 
Example #13
Source File: nmf_context_extractor.py    From yelp with GNU Lesser General Public License v2.1 5 votes vote down vote up
def build_stable_topic_model(self):

        matrices = []
        for i in range(Constants.TOPIC_MODEL_PASSES):
            topic_term_matrix = self.build_single_topic_model().transpose()
            matrices.append(topic_term_matrix)

        stack_matrix = numpy.hstack(matrices)
        stack_matrix = normalize(stack_matrix, axis=0)
        stack_matrix = stack_matrix.transpose()

        print "Stack matrix M of size %s" % str(stack_matrix.shape)

        self.topic_model = decomposition.NMF(
            init="nndsvd", n_components=self.num_topics,
            max_iter=Constants.TOPIC_MODEL_ITERATIONS,
            alpha=Constants.NMF_REGULARIZATION,
            l1_ratio=Constants.NMF_REGULARIZATION_RATIO
        )

        self.document_topic_matrix = \
            self.topic_model.fit_transform(stack_matrix)
        self.topic_term_matrix = self.topic_model.components_

        row_sums = self.topic_term_matrix.sum(axis=1)
        self.topic_term_matrix /= row_sums[:, numpy.newaxis]

        print "Generated factor W of size %s and factor H of size %s" % (
            str(self.document_topic_matrix.shape),
            str(self.topic_term_matrix.shape)
        )

        # return model 
Example #14
Source File: nmf_context_extractor.py    From yelp with GNU Lesser General Public License v2.1 5 votes vote down vote up
def build_single_topic_model(self):
        # print('%s: building NMF topic model' %
        #       time.strftime("%Y/%m/%d-%H:%M:%S"))

        topic_model = decomposition.NMF(
            init="nndsvd", n_components=self.num_topics,
            max_iter=Constants.TOPIC_MODEL_ITERATIONS,
            alpha=Constants.NMF_REGULARIZATION,
            l1_ratio=Constants.NMF_REGULARIZATION_RATIO
        )
        topic_model.fit_transform(self.document_term_matrix)
        topic_term_matrix = topic_model.components_

        return topic_term_matrix 
Example #15
Source File: nmf.py    From yelp with GNU Lesser General Public License v2.1 5 votes vote down vote up
def rank_terms( self, topic_index, top = -1 ):
		"""
		Return the top ranked terms for the specified topic, generated during the last NMF run.
		"""
		if self.H is None:
			raise ValueError("No results for previous run available")
		# NB: reverse
		top_indices = np.argsort( self.H[topic_index,:] )[::-1]
		# truncate if necessary
		if top < 1 or top > len(top_indices):
			return top_indices
		return top_indices[0:top] 
Example #16
Source File: topic.py    From Python-DevOps with MIT License 5 votes vote down vote up
def train_nmf(corpus,n_topics=10, max_df=0.95, min_df=2,cleaning=clearstring,stop_words='english'):
    if cleaning is not None:
        for i in range(len(corpus)): corpus[i] = cleaning(corpus[i])
    tfidf_vectorizer = TfidfVectorizer(max_df = max_df, min_df = min_df, stop_words = stop_words)
    tfidf = tfidf_vectorizer.fit_transform(corpus)
    tfidf_features = tfidf_vectorizer.get_feature_names()
    nmf = NMF(n_components=n_topics, random_state = 1, alpha =.1, l1_ratio=.5, init = 'nndsvd').fit(tfidf)
    return TOPIC(tfidf_features,nmf) 
Example #17
Source File: decomposition.py    From hypers with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def calculate(self, n_components: int = 4, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
        if n_components is None:
            n_components = self.X.shape[-1]

        mdl = NMF(n_components=n_components, **kwargs)
        self.ims = mdl.fit_transform(self.X.collapse()).reshape(self.X.data.shape[:-1] + (n_components,))
        self.spcs = mdl.components_.transpose()

        return self.ims, self.spcs 
Example #18
Source File: nmf.py    From topic-ensemble with Apache License 2.0 5 votes vote down vote up
def apply( self, X, k = 2, init_W = None, init_H = None ):
		"""
		Apply NMF to the specified document-term matrix X.
		"""
		self.W = None
		self.H = None
		random_seed = np.random.randint( 1, 100000 )
		if not (init_W is None or init_H is None):
			model = decomposition.NMF( init="custom", n_components=k, max_iter=self.max_iters, random_state = random_seed )
			self.W = model.fit_transform( X, W=init_W, H=init_H )
		else:
			model = decomposition.NMF( init=self.init_strategy, n_components=k, max_iter=self.max_iters, random_state = random_seed )
			self.W = model.fit_transform( X )
		self.H = model.components_ 
Example #19
Source File: nmf.py    From topic-ensemble with Apache License 2.0 5 votes vote down vote up
def rank_terms( self, topic_index, top = -1 ):
		"""
		Return the top ranked terms for the specified topic, generated during the last NMF run.
		"""
		if self.H is None:
			raise ValueError("No results for previous run available")
		# NB: reverse
		top_indices = np.argsort( self.H[topic_index,:] )[::-1]
		# truncate if necessary
		if top < 1 or top > len(top_indices):
			return top_indices
		return top_indices[0:top] 
Example #20
Source File: NMF.py    From mltk-algo-contrib with Apache License 2.0 5 votes vote down vote up
def __init__(self, options):
        self.handle_options(options)
        out_params = convert_params(
            options.get('params', {}),
            floats=['beta_loss','tol','alpha','l1_ratio'],
            strs=['init','solver'],
            ints=['k','max_iter','random_state'],
            bools=['versbose','shuffle'],
            aliases={'k': 'n_components'}
        )

        self.estimator = _NMF(**out_params) 
Example #21
Source File: NMFRecommender.py    From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 5 votes vote down vote up
def fit(self, num_factors=100,
            l1_ratio = 0.5,
            solver = "multiplicative_update",
            init_type = "random",
            beta_loss = "frobenius",
            verbose = False,
            random_seed = None):


        assert l1_ratio>= 0 and l1_ratio<=1, "{}: l1_ratio must be between 0 and 1, provided value was {}".format(self.RECOMMENDER_NAME, l1_ratio)

        if solver not in self.SOLVER_VALUES:
           raise ValueError("Value for 'solver' not recognized. Acceptable values are {}, provided was '{}'".format(self.SOLVER_VALUES.keys(), solver))

        if init_type not in self.INIT_VALUES:
           raise ValueError("Value for 'init_type' not recognized. Acceptable values are {}, provided was '{}'".format(self.INIT_VALUES, init_type))

        if beta_loss not in self.BETA_LOSS_VALUES:
           raise ValueError("Value for 'beta_loss' not recognized. Acceptable values are {}, provided was '{}'".format(self.BETA_LOSS_VALUES, beta_loss))

        self._print("Computing NMF decomposition...")

        nmf_solver = NMF(n_components  = num_factors,
                         init = init_type,
                         solver = self.SOLVER_VALUES[solver],
                         beta_loss = beta_loss,
                         random_state = random_seed,
                         l1_ratio = l1_ratio,
                         shuffle = True,
                         verbose = verbose,
                         max_iter = 500)

        nmf_solver.fit(self.URM_train)

        self.ITEM_factors = nmf_solver.components_.copy().T
        self.USER_factors = nmf_solver.transform(self.URM_train)

        self._print("Computing NMF decomposition... Done!") 
Example #22
Source File: test_estimator_checks.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_check_estimator_clones():
    # check that check_estimator doesn't modify the estimator it receives
    from sklearn.datasets import load_iris
    iris = load_iris()

    for Estimator in [GaussianMixture, LinearRegression,
                      RandomForestClassifier, NMF, SGDClassifier,
                      MiniBatchKMeans]:
        with ignore_warnings(category=FutureWarning):
            # when 'est = SGDClassifier()'
            est = Estimator()
        set_checking_parameters(est)
        set_random_state(est)
        # without fitting
        old_hash = joblib.hash(est)
        check_estimator(est)
        assert_equal(old_hash, joblib.hash(est))

        with ignore_warnings(category=FutureWarning):
            # when 'est = SGDClassifier()'
            est = Estimator()
        set_checking_parameters(est)
        set_random_state(est)
        # with fitting
        est.fit(iris.data + 10, iris.target)
        old_hash = joblib.hash(est)
        check_estimator(est)
        assert_equal(old_hash, joblib.hash(est)) 
Example #23
Source File: nmf.py    From topic-stability with Apache License 2.0 5 votes vote down vote up
def apply( self, X, k = 2 ):
		"""
		Apply NMF to the specified document-term matrix X.
		"""
		from sklearn import decomposition
		self.W = None
		self.H = None
		model = decomposition.NMF(init=self.init_strategy, n_components=k, max_iter=self.max_iters)
		self.W = model.fit_transform(X)
		self.H = model.components_ 
Example #24
Source File: nmf.py    From topic-stability with Apache License 2.0 5 votes vote down vote up
def rank_terms( self, topic_index, top = -1 ):
		"""
		Return the top ranked terms for the specified topic, generated during the last NMF run.
		"""
		if self.H is None:
			raise ValueError("No results for previous run available")
		# NB: reverse
		top_indices = np.argsort( self.H[topic_index,:] )[::-1]
		# truncate if necessary
		if top < 1 or top > len(top_indices):
			return top_indices
		return top_indices[0:top] 
Example #25
Source File: nmf.py    From topic-stability with Apache License 2.0 5 votes vote down vote up
def rank_terms( self, topic_index, top = -1 ):
		"""
		Return the top ranked terms for the specified topic, generated during the last NMF run.
		"""
		if self.H is None:
			raise ValueError("No results for previous run available")
		h = np.array( self.H[topic_index,:] ).flatten()
		# NB: reverse ordering
		top_indices = np.argsort(h)[::-1]
		# truncate
		if top < 1 or top > len(top_indices):
			return top_indices
		return top_indices[0:top] 
Example #26
Source File: danmf.py    From DANMF with GNU General Public License v3.0 5 votes vote down vote up
def sklearn_pretrain(self, i):
        """
        Pretraining a single layer of the model with sklearn.
        :param i: Layer index.
        """
        nmf_model = NMF(n_components=self.args.layers[i],
                        init="random",
                        random_state=self.args.seed,
                        max_iter=self.args.pre_iterations)

        U = nmf_model.fit_transform(self.Z)
        V = nmf_model.components_
        return U, V 
Example #27
Source File: SentencesForTopicModeling.py    From scattertext with Apache License 2.0 5 votes vote down vote up
def get_topics_from_model(
			self,
			pipe=Pipeline([
				('tfidf', TfidfTransformer(sublinear_tf=True)),
				('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))]),
			num_terms_per_topic=10):
		'''

		Parameters
		----------
		pipe : Pipeline
			For example, `Pipeline([
				('tfidf', TfidfTransformer(sublinear_tf=True)),
				('nmf', (NMF(n_components=30, alpha=.1, l1_ratio=.5, random_state=0)))])`
			The last transformer must populate a `components_` attribute when finished.
		num_terms_per_topic : int

		Returns
		-------
		dict: {term: [term1, ...], ...}
		'''
		pipe.fit_transform(self.sentX)

		topic_model = {}
		for topic_idx, topic in enumerate(pipe._final_estimator.components_):
			term_list = [self.termidxstore.getval(i)
			             for i
			             in topic.argsort()[:-num_terms_per_topic - 1:-1]
			             if topic[i] > 0]
			if len(term_list) > 0:
				topic_model['%s. %s' % (topic_idx, term_list[0])] = term_list
			else:
				Warning("Topic %s has no terms with scores > 0. Omitting." % (topic_idx))
		return topic_model 
Example #28
Source File: utils.py    From MNIST-baselines with MIT License 5 votes vote down vote up
def skNMF(data, dim):
    model = NMF(n_components=dim)
    model.fit(data)
    return model.transform(data)

# Max-min norm 
Example #29
Source File: danmf.py    From karateclub with GNU General Public License v3.0 5 votes vote down vote up
def _sklearn_pretrain(self, i):
        """
        Pre-training a single layer of the model with sklearn.

        Arg types:
            * **i** *(int)* - The layer index.
        """
        nmf_model = NMF(n_components=self.layers[i],
                        init="random",
                        random_state=self.seed,
                        max_iter=self.pre_iterations)

        U = nmf_model.fit_transform(self._Z)
        V = nmf_model.components_
        return U, V 
Example #30
Source File: danmf.py    From karateclub with GNU General Public License v3.0 5 votes vote down vote up
def _pre_training(self):
        """
        Pre-training each NMF layer.
        """
        self._U_s = []
        self._V_s = []
        for i in range(self._p):
            self._setup_z(i)
            U, V = self._sklearn_pretrain(i)
            self._U_s.append(U)
            self._V_s.append(V)