Python sklearn.utils.extmath.randomized_svd() Examples

The following are 30 code examples of sklearn.utils.extmath.randomized_svd(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils.extmath , or try the search function

Example #1

Source File: ops.py From pymssa with MIT License

6 votes

def decompose_trajectory_matrix(trajectory_matrix, K, svd_method='randomized'):
    # calculate S matrix
    # https://arxiv.org/pdf/1309.5050.pdf
    S = np.dot(trajectory_matrix, trajectory_matrix.T)

    # Perform SVD on S
    if svd_method == 'randomized':
        U, s, V = randomized_svd(S, K)
    elif svd_method == 'exact':
        U, s, V = np.linalg.svd(S)

    # Valid rank is only where eigenvalues > 0
    rank = np.sum(s > 0)

    # singular values are the square root of the eigenvalues
    s = np.sqrt(s)

    return U, s, V, rank

Example #2

Source File: factor_analyzer.py From factor_analyzer with GNU General Public License v2.0

6 votes

def _fit_principal(self, X):
        """
        Fit the factor analysis model using a principal
        factor analysis solution.

        Parameters
        ----------
        X : array-like
            The full data set.

        Returns
        -------
        loadings : numpy array
            The factor loadings matrix.
        """
        # standardize the data
        X = X.copy()
        X = (X - X.mean(0)) / X.std(0)

        # perform the randomized singular value decomposition
        U, S, V = randomized_svd(X, self.n_factors)
        corr_mtx = np.dot(X, V.T)
        loadings = np.array([[pearsonr(x, c)[0] for c in corr_mtx.T] for x in X.T])
        return loadings

Example #3

Source File: ksvd.py From Lyssandra with BSD 3-Clause "New" or "Revised" License

6 votes

def ksvd(Y, D, X, n_cycles=1, verbose=True):
    n_atoms = D.shape[1]
    n_features, n_samples = Y.shape
    unused_atoms = []
    R = Y - fast_dot(D, X)

    for c in range(n_cycles):
        for k in range(n_atoms):
            if verbose:
                sys.stdout.write("\r" + "k-svd..." + ":%3.2f%%" % ((k / float(n_atoms)) * 100))
                sys.stdout.flush()
            # find all the datapoints that use the kth atom
            omega_k = X[k, :] != 0
            if not np.any(omega_k):
                unused_atoms.append(k)
                continue
            # the residual due to all the other atoms but k
            Rk = R[:, omega_k] + np.outer(D[:, k], X[k, omega_k])
            U, S, V = randomized_svd(Rk, n_components=1, n_iter=10, flip_sign=False)
            D[:, k] = U[:, 0]
            X[k, omega_k] = V[0, :] * S[0]
            # update the residual
            R[:, omega_k] = Rk - np.outer(D[:, k], X[k, omega_k])
        print ""
    return D, X, unused_atoms

Example #4

Source File: synthetic_test.py From socialsent with Apache License 2.0

6 votes

def make_synthetic_data(ppmi, counts, word_subset, new_weight, num_synth=10, 
        old_pos=OLD_POS, new_pos=NEW_POS, old_neg=OLD_NEG, new_neg=NEW_NEG, dim=300, seed_offset=0):
    #print new_weight
    #ppmi = ppmi.get_subembed(word_subset, restrict_context=False)
    amel_vecs = [] 
    print "Sampling positive..."
    for i in xrange(num_synth):
        amel_vecs.append(_sample_vec2(new_pos, old_neg, counts, new_weight, seed=i+seed_offset))
    amel_mat = vstack(amel_vecs)
    pejor_vecs = []
    print "Sampling negative..."
    for i in xrange(num_synth):
        pejor_vecs.append(_sample_vec2(old_pos, new_neg, counts, 1-new_weight, seed=i+num_synth+seed_offset))
    pejor_mat = vstack(pejor_vecs)
    print "Making matrix..."
#    ppmi_mat = vstack([ppmi.m, amel_mat, pejor_mat]) 
    u = vstack([counts.m, amel_mat, pejor_mat]) 
    print "SVD on matrix..."
#    u, s, v = randomized_svd(ppmi_mat, n_components=dim, n_iter=2)
    new_vocab = ppmi.iw
    new_vocab.extend(['a-{0:d}'.format(i) for i in range(num_synth)])
    new_vocab.extend(['p-{0:d}'.format(i) for i in range(num_synth)])
    return Embedding(u, new_vocab)

Example #5

Source File: soft_impute.py From fancyimpute with Apache License 2.0

6 votes

def _svd_step(self, X, shrinkage_value, max_rank=None):
        """
        Returns reconstructed X from low-rank thresholded SVD and
        the rank achieved.
        """
        if max_rank:
            # if we have a max rank then perform the faster randomized SVD
            (U, s, V) = randomized_svd(
                X,
                max_rank,
                n_iter=self.n_power_iterations)
        else:
            # perform a full rank SVD using ARPACK
            (U, s, V) = np.linalg.svd(
                X,
                full_matrices=False,
                compute_uv=True)
        s_thresh = np.maximum(s - shrinkage_value, 0)
        rank = (s_thresh > 0).sum()
        s_thresh = s_thresh[:rank]
        U_thresh = U[:, :rank]
        V_thresh = V[:rank, :]
        S_thresh = np.diag(s_thresh)
        X_reconstruction = np.dot(U_thresh, np.dot(S_thresh, V_thresh))
        return X_reconstruction, rank

Example #6

Source File: soft_impute.py From ME-Net with MIT License

6 votes

def _svd_step(self, X, shrinkage_value, max_rank=None):
        """
        Returns reconstructed X from low-rank thresholded SVD and
        the rank achieved.
        """
        if max_rank:
            # if we have a max rank then perform the faster randomized SVD
            (U, s, V) = randomized_svd(
                X,
                max_rank,
                n_iter=self.n_power_iterations)
        else:
            # perform a full rank SVD using ARPACK
            (U, s, V) = np.linalg.svd(
                X,
                full_matrices=False,
                compute_uv=True)
        s_thresh = np.maximum(s - shrinkage_value, 0)
        rank = (s_thresh > 0).sum()
        s_thresh = s_thresh[:rank]
        U_thresh = U[:, :rank]
        V_thresh = V[:rank, :]
        S_thresh = np.diag(s_thresh)
        X_reconstruction = np.dot(U_thresh, np.dot(S_thresh, V_thresh))
        return X_reconstruction, rank

Example #7

Source File: PureSVDRecommender.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0

6 votes

def fit(self, num_factors=100, topK = None, random_seed = None):

        self._print("Computing SVD decomposition...")

        U, Sigma, QT = randomized_svd(self.URM_train,
                                      n_components=num_factors,
                                      #n_iter=5,
                                      random_state = random_seed)

        if topK is None:
            topK = self.n_items

        W_sparse = compute_W_sparse_from_item_latent_factors(QT.T, topK=topK)

        self.W_sparse = sps.csr_matrix(W_sparse)

        self._print("Computing SVD decomposition... Done!")

Example #8

Source File: svd.py From prince with MIT License

5 votes

def compute_svd(X, n_components, n_iter, random_state, engine):
    """Computes an SVD with k components."""

    # Determine what SVD engine to use
    if engine == 'auto':
        engine = 'sklearn'

    # Compute the SVD
    if engine == 'fbpca':
        if FBPCA_INSTALLED:
            U, s, V = fbpca.pca(X, k=n_components, n_iter=n_iter)
        else:
            raise ValueError('fbpca is not installed; please install it if you want to use it')
    elif engine == 'sklearn':
        U, s, V = extmath.randomized_svd(
            X,
            n_components=n_components,
            n_iter=n_iter,
            random_state=random_state
        )
    else:
        raise ValueError("engine has to be one of ('auto', 'fbpca', 'sklearn')")

    U, V = extmath.svd_flip(U, V)

    return U, s, V

Example #9

Source File: test_extmath.py From twitter-stock-recommendation with MIT License

5 votes

def test_randomized_svd_low_rank_with_noise():
    # Check that extmath.randomized_svd can handle noisy matrices
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # generate a matrix X wity structure approximate rank `rank` and an
    # important noisy component
    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=0.1,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)

    for normalizer in ['auto', 'none', 'LU', 'QR']:
        # compute the singular values of X using the fast approximate
        # method without the iterated power method
        _, sa, _ = randomized_svd(X, k, n_iter=0,
                                  power_iteration_normalizer=normalizer,
                                  random_state=0)

        # the approximation does not tolerate the noise:
        assert_greater(np.abs(s[:k] - sa).max(), 0.01)

        # compute the singular values of X using the fast approximate
        # method with iterated power method
        _, sap, _ = randomized_svd(X, k,
                                   power_iteration_normalizer=normalizer,
                                   random_state=0)

        # the iterated power method is helping getting rid of the noise:
        assert_almost_equal(s[:k], sap, decimal=3)

Example #10

Source File: PureSVDRecommender.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0

5 votes

def fit(self, num_factors=100, random_seed = None):

        self._print("Computing SVD decomposition...")

        U, Sigma, QT = randomized_svd(self.URM_train,
                                      n_components=num_factors,
                                      #n_iter=5,
                                      random_state = random_seed)

        U_s = U * sps.diags(Sigma)

        self.USER_factors = U_s
        self.ITEM_factors = QT.T

        self._print("Computing SVD decomposition... Done!")

Example #11

Source File: test_extmath.py From twitter-stock-recommendation with MIT License

5 votes

def test_randomized_svd_infinite_rank():
    # Check that extmath.randomized_svd can handle noisy matrices
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # let us try again without 'low_rank component': just regularly but slowly
    # decreasing singular values: the rank of the data matrix is infinite
    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=1.0,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)
    for normalizer in ['auto', 'none', 'LU', 'QR']:
        # compute the singular values of X using the fast approximate method
        # without the iterated power method
        _, sa, _ = randomized_svd(X, k, n_iter=0,
                                  power_iteration_normalizer=normalizer)

        # the approximation does not tolerate the noise:
        assert_greater(np.abs(s[:k] - sa).max(), 0.1)

        # compute the singular values of X using the fast approximate method
        # with iterated power method
        _, sap, _ = randomized_svd(X, k, n_iter=5,
                                   power_iteration_normalizer=normalizer)

        # the iterated power method is still managing to get most of the
        # structure at the requested rank
        assert_almost_equal(s[:k], sap, decimal=3)

Example #12

Source File: makelowdim.py From socialsent with Apache License 2.0

5 votes

def run(in_file, out_path, dim=300, keep_words=None): 
        base_embed = Explicit.load(in_file, normalize=False)
        if keep_words != None:
            base_embed = base_embed.get_subembed(keep_words)
        u, s, v = randomized_svd(base_embed.m, n_components=dim, n_iter=5)
        np.save(out_path + "-u.npy", u)
        np.save(out_path + "-v.npy", v)
        np.save(out_path + "-s.npy", s)
        util.write_pickle(base_embed.iw, out_path  + "-vocab.pkl")

Example #13

Source File: test_extmath.py From twitter-stock-recommendation with MIT License

5 votes

def test_randomized_svd_transpose_consistency():
    # Check that transposing the design matrix has limited impact
    n_samples = 100
    n_features = 500
    rank = 4
    k = 10

    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=0.5,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False,
                                random_state=0)
    U2, s2, V2 = randomized_svd(X, k, n_iter=3, transpose=True,
                                random_state=0)
    U3, s3, V3 = randomized_svd(X, k, n_iter=3, transpose='auto',
                                random_state=0)
    U4, s4, V4 = linalg.svd(X, full_matrices=False)

    assert_almost_equal(s1, s4[:k], decimal=3)
    assert_almost_equal(s2, s4[:k], decimal=3)
    assert_almost_equal(s3, s4[:k], decimal=3)

    assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)
    assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)

    # in this case 'auto' is equivalent to transpose
    assert_almost_equal(s2, s3)

Example #14

Source File: test_extmath.py From twitter-stock-recommendation with MIT License

5 votes

def test_randomized_svd_power_iteration_normalizer():
    # randomized_svd with power_iteration_normalized='none' diverges for
    # large number of power iterations on this dataset
    rng = np.random.RandomState(42)
    X = make_low_rank_matrix(100, 500, effective_rank=50, random_state=rng)
    X += 3 * rng.randint(0, 2, size=X.shape)
    n_components = 50

    # Check that it diverges with many (non-normalized) power iterations
    U, s, V = randomized_svd(X, n_components, n_iter=2,
                             power_iteration_normalizer='none')
    A = X - U.dot(np.diag(s).dot(V))
    error_2 = linalg.norm(A, ord='fro')
    U, s, V = randomized_svd(X, n_components, n_iter=20,
                             power_iteration_normalizer='none')
    A = X - U.dot(np.diag(s).dot(V))
    error_20 = linalg.norm(A, ord='fro')
    assert_greater(np.abs(error_2 - error_20), 100)

    for normalizer in ['LU', 'QR', 'auto']:
        U, s, V = randomized_svd(X, n_components, n_iter=2,
                                 power_iteration_normalizer=normalizer,
                                 random_state=0)
        A = X - U.dot(np.diag(s).dot(V))
        error_2 = linalg.norm(A, ord='fro')

        for i in [5, 10, 50]:
            U, s, V = randomized_svd(X, n_components, n_iter=i,
                                     power_iteration_normalizer=normalizer,
                                     random_state=0)
            A = X - U.dot(np.diag(s).dot(V))
            error = linalg.norm(A, ord='fro')
            assert_greater(15, np.abs(error_2 - error))

Example #15

Source File: prone.py From nodevectors with MIT License

5 votes

def tsvd_rand(matrix, n_components):
        """
        Sparse randomized tSVD for fast embedding
        """
        l = matrix.shape[0]
        # Is this csc conversion necessary?
        smat = sparse.csc_matrix(matrix)
        U, Sigma, VT = randomized_svd(smat, 
            n_components=n_components, 
            n_iter=5, random_state=None)
        U = U * np.sqrt(Sigma)
        U = preprocessing.normalize(U, "l2")
        return U

Example #16

Source File: prone.py From cogdl with MIT License

5 votes

def _get_embedding_rand(self, matrix):
        # Sparse randomized tSVD for fast embedding
        t1 = time.time()
        l = matrix.shape[0]
        smat = sp.csc_matrix(matrix)  # convert to sparse CSC format
        print("svd sparse", smat.data.shape[0] * 1.0 / l ** 2)
        U, Sigma, VT = randomized_svd(
            smat, n_components=self.dimension, n_iter=5, random_state=None
        )
        U = U * np.sqrt(Sigma)
        U = preprocessing.normalize(U, "l2")
        print("sparsesvd time", time.time() - t1)
        return U

Example #17

Source File: netsmf.py From cogdl with MIT License

5 votes

def _get_embedding_rand(self, matrix):
        # Sparse randomized tSVD for fast embedding
        t1 = time.time()
        l = matrix.shape[0]
        smat = sp.csc_matrix(matrix)
        print("svd sparse", smat.data.shape[0] * 1.0 / l ** 2)
        U, Sigma, VT = randomized_svd(
            smat, n_components=self.dimension, n_iter=5, random_state=None
        )
        U = U * np.sqrt(Sigma)
        U = preprocessing.normalize(U, "l2")
        print("sparsesvd time", time.time() - t1)
        return U

Example #18

Source File: test_extmath.py From twitter-stock-recommendation with MIT License

5 votes

def test_randomized_svd_sign_flip():
    a = np.array([[2.0, 0.0], [0.0, 1.0]])
    u1, s1, v1 = randomized_svd(a, 2, flip_sign=True, random_state=41)
    for seed in range(10):
        u2, s2, v2 = randomized_svd(a, 2, flip_sign=True, random_state=seed)
        assert_almost_equal(u1, u2)
        assert_almost_equal(v1, v2)
        assert_almost_equal(np.dot(u2 * s2, v2), a)
        assert_almost_equal(np.dot(u2.T, u2), np.eye(2))
        assert_almost_equal(np.dot(v2.T, v2), np.eye(2))

Example #19

Source File: proNE.py From ProNE with MIT License

5 votes

def get_embedding_rand(self, matrix):
		# Sparse randomized tSVD for fast embedding
		t1 = time.time()
		l = matrix.shape[0]
		smat = scipy.sparse.csc_matrix(matrix)  # convert to sparse CSC format
		print('svd sparse', smat.data.shape[0] * 1.0 / l ** 2)
		U, Sigma, VT = randomized_svd(smat, n_components=self.dimension, n_iter=5, random_state=None)
		U = U * np.sqrt(Sigma)
		U = preprocessing.normalize(U, "l2")
		print('sparsesvd time', time.time() - t1)
		return U

Example #20

Source File: model_fitter.py From themarketingtechnologist with Apache License 2.0

5 votes

def apply_uv_decomposition(self):
        U, Sigma, VT = randomized_svd(self.behaviour_matrix,
                                      n_components=15,
                                      n_iter=10,
                                      random_state=None)
        print(U.shape)
        print(VT.shape)
        self.X_hat = np.dot(U, VT)  # U * np.diag(Sigma)

Example #21

Source File: soft_impute.py From fancyimpute with Apache License 2.0

5 votes

def _max_singular_value(self, X_filled):
        # quick decomposition of X_filled into rank-1 SVD
        _, s, _ = randomized_svd(
            X_filled,
            1,
            n_iter=5)
        return s[0]

Example #22

Source File: soft_impute.py From ME-Net with MIT License

5 votes

def _max_singular_value(self, X_filled):
        # quick decomposition of X_filled into rank-1 SVD
        _, s, _ = randomized_svd(
            X_filled,
            1,
            n_iter=5)
        return s[0]

Example #23

Source File: prone.py From CogDL-TensorFlow with MIT License

5 votes

def _get_embedding_rand(self, matrix):
        # Sparse randomized tSVD for fast embedding
        t1 = time.time()
        l = matrix.shape[0]
        smat = sp.csc_matrix(matrix)  # convert to sparse CSC format
        print("svd sparse", smat.data.shape[0] * 1.0 / l ** 2)
        U, Sigma, VT = randomized_svd(
            smat, n_components=self.dimension, n_iter=5, random_state=None
        )
        U = U * np.sqrt(Sigma)
        U = preprocessing.normalize(U, "l2")
        print("sparsesvd time", time.time() - t1)
        return U

Example #24

Source File: svt_solver.py From matrix-completion with Eclipse Public License 1.0

5 votes

def _my_svd(M, k, algorithm):
    if algorithm == 'randomized':
        (U, S, V) = randomized_svd(
            M, n_components=min(k, M.shape[1]-1), n_oversamples=20)
    elif algorithm == 'arpack':
        (U, S, V) = svds(M, k=min(k, min(M.shape)-1))
        S = S[::-1]
        U, V = svd_flip(U[:, ::-1], V[::-1])
    else:
        raise ValueError("unknown algorithm")
    return (U, S, V)

Example #25