Python sklearn.utils.extmath.randomized_svd() Examples

The following are 30 code examples of sklearn.utils.extmath.randomized_svd(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils.extmath , or try the search function .
Example #1
Source File: ops.py    From pymssa with MIT License 6 votes vote down vote up
def decompose_trajectory_matrix(trajectory_matrix, K, svd_method='randomized'):
    # calculate S matrix
    # https://arxiv.org/pdf/1309.5050.pdf
    S = np.dot(trajectory_matrix, trajectory_matrix.T)

    # Perform SVD on S
    if svd_method == 'randomized':
        U, s, V = randomized_svd(S, K)
    elif svd_method == 'exact':
        U, s, V = np.linalg.svd(S)

    # Valid rank is only where eigenvalues > 0
    rank = np.sum(s > 0)

    # singular values are the square root of the eigenvalues
    s = np.sqrt(s)

    return U, s, V, rank 
Example #2
Source File: factor_analyzer.py    From factor_analyzer with GNU General Public License v2.0 6 votes vote down vote up
def _fit_principal(self, X):
        """
        Fit the factor analysis model using a principal
        factor analysis solution.

        Parameters
        ----------
        X : array-like
            The full data set.

        Returns
        -------
        loadings : numpy array
            The factor loadings matrix.
        """
        # standardize the data
        X = X.copy()
        X = (X - X.mean(0)) / X.std(0)

        # perform the randomized singular value decomposition
        U, S, V = randomized_svd(X, self.n_factors)
        corr_mtx = np.dot(X, V.T)
        loadings = np.array([[pearsonr(x, c)[0] for c in corr_mtx.T] for x in X.T])
        return loadings 
Example #3
Source File: ksvd.py    From Lyssandra with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def ksvd(Y, D, X, n_cycles=1, verbose=True):
    n_atoms = D.shape[1]
    n_features, n_samples = Y.shape
    unused_atoms = []
    R = Y - fast_dot(D, X)

    for c in range(n_cycles):
        for k in range(n_atoms):
            if verbose:
                sys.stdout.write("\r" + "k-svd..." + ":%3.2f%%" % ((k / float(n_atoms)) * 100))
                sys.stdout.flush()
            # find all the datapoints that use the kth atom
            omega_k = X[k, :] != 0
            if not np.any(omega_k):
                unused_atoms.append(k)
                continue
            # the residual due to all the other atoms but k
            Rk = R[:, omega_k] + np.outer(D[:, k], X[k, omega_k])
            U, S, V = randomized_svd(Rk, n_components=1, n_iter=10, flip_sign=False)
            D[:, k] = U[:, 0]
            X[k, omega_k] = V[0, :] * S[0]
            # update the residual
            R[:, omega_k] = Rk - np.outer(D[:, k], X[k, omega_k])
        print ""
    return D, X, unused_atoms 
Example #4
Source File: synthetic_test.py    From socialsent with Apache License 2.0 6 votes vote down vote up
def make_synthetic_data(ppmi, counts, word_subset, new_weight, num_synth=10, 
        old_pos=OLD_POS, new_pos=NEW_POS, old_neg=OLD_NEG, new_neg=NEW_NEG, dim=300, seed_offset=0):
    #print new_weight
    #ppmi = ppmi.get_subembed(word_subset, restrict_context=False)
    amel_vecs = [] 
    print "Sampling positive..."
    for i in xrange(num_synth):
        amel_vecs.append(_sample_vec2(new_pos, old_neg, counts, new_weight, seed=i+seed_offset))
    amel_mat = vstack(amel_vecs)
    pejor_vecs = []
    print "Sampling negative..."
    for i in xrange(num_synth):
        pejor_vecs.append(_sample_vec2(old_pos, new_neg, counts, 1-new_weight, seed=i+num_synth+seed_offset))
    pejor_mat = vstack(pejor_vecs)
    print "Making matrix..."
#    ppmi_mat = vstack([ppmi.m, amel_mat, pejor_mat]) 
    u = vstack([counts.m, amel_mat, pejor_mat]) 
    print "SVD on matrix..."
#    u, s, v = randomized_svd(ppmi_mat, n_components=dim, n_iter=2)
    new_vocab = ppmi.iw
    new_vocab.extend(['a-{0:d}'.format(i) for i in range(num_synth)])
    new_vocab.extend(['p-{0:d}'.format(i) for i in range(num_synth)])
    return Embedding(u, new_vocab) 
Example #5
Source File: soft_impute.py    From fancyimpute with Apache License 2.0 6 votes vote down vote up
def _svd_step(self, X, shrinkage_value, max_rank=None):
        """
        Returns reconstructed X from low-rank thresholded SVD and
        the rank achieved.
        """
        if max_rank:
            # if we have a max rank then perform the faster randomized SVD
            (U, s, V) = randomized_svd(
                X,
                max_rank,
                n_iter=self.n_power_iterations)
        else:
            # perform a full rank SVD using ARPACK
            (U, s, V) = np.linalg.svd(
                X,
                full_matrices=False,
                compute_uv=True)
        s_thresh = np.maximum(s - shrinkage_value, 0)
        rank = (s_thresh > 0).sum()
        s_thresh = s_thresh[:rank]
        U_thresh = U[:, :rank]
        V_thresh = V[:rank, :]
        S_thresh = np.diag(s_thresh)
        X_reconstruction = np.dot(U_thresh, np.dot(S_thresh, V_thresh))
        return X_reconstruction, rank 
Example #6
Source File: soft_impute.py    From ME-Net with MIT License 6 votes vote down vote up
def _svd_step(self, X, shrinkage_value, max_rank=None):
        """
        Returns reconstructed X from low-rank thresholded SVD and
        the rank achieved.
        """
        if max_rank:
            # if we have a max rank then perform the faster randomized SVD
            (U, s, V) = randomized_svd(
                X,
                max_rank,
                n_iter=self.n_power_iterations)
        else:
            # perform a full rank SVD using ARPACK
            (U, s, V) = np.linalg.svd(
                X,
                full_matrices=False,
                compute_uv=True)
        s_thresh = np.maximum(s - shrinkage_value, 0)
        rank = (s_thresh > 0).sum()
        s_thresh = s_thresh[:rank]
        U_thresh = U[:, :rank]
        V_thresh = V[:rank, :]
        S_thresh = np.diag(s_thresh)
        X_reconstruction = np.dot(U_thresh, np.dot(S_thresh, V_thresh))
        return X_reconstruction, rank 
Example #7
Source File: PureSVDRecommender.py    From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 6 votes vote down vote up
def fit(self, num_factors=100, topK = None, random_seed = None):

        self._print("Computing SVD decomposition...")

        U, Sigma, QT = randomized_svd(self.URM_train,
                                      n_components=num_factors,
                                      #n_iter=5,
                                      random_state = random_seed)

        if topK is None:
            topK = self.n_items

        W_sparse = compute_W_sparse_from_item_latent_factors(QT.T, topK=topK)

        self.W_sparse = sps.csr_matrix(W_sparse)

        self._print("Computing SVD decomposition... Done!") 
Example #8
Source File: svd.py    From prince with MIT License 5 votes vote down vote up
def compute_svd(X, n_components, n_iter, random_state, engine):
    """Computes an SVD with k components."""

    # Determine what SVD engine to use
    if engine == 'auto':
        engine = 'sklearn'

    # Compute the SVD
    if engine == 'fbpca':
        if FBPCA_INSTALLED:
            U, s, V = fbpca.pca(X, k=n_components, n_iter=n_iter)
        else:
            raise ValueError('fbpca is not installed; please install it if you want to use it')
    elif engine == 'sklearn':
        U, s, V = extmath.randomized_svd(
            X,
            n_components=n_components,
            n_iter=n_iter,
            random_state=random_state
        )
    else:
        raise ValueError("engine has to be one of ('auto', 'fbpca', 'sklearn')")

    U, V = extmath.svd_flip(U, V)

    return U, s, V 
Example #9
Source File: test_extmath.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_randomized_svd_low_rank_with_noise():
    # Check that extmath.randomized_svd can handle noisy matrices
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # generate a matrix X wity structure approximate rank `rank` and an
    # important noisy component
    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=0.1,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)

    for normalizer in ['auto', 'none', 'LU', 'QR']:
        # compute the singular values of X using the fast approximate
        # method without the iterated power method
        _, sa, _ = randomized_svd(X, k, n_iter=0,
                                  power_iteration_normalizer=normalizer,
                                  random_state=0)

        # the approximation does not tolerate the noise:
        assert_greater(np.abs(s[:k] - sa).max(), 0.01)

        # compute the singular values of X using the fast approximate
        # method with iterated power method
        _, sap, _ = randomized_svd(X, k,
                                   power_iteration_normalizer=normalizer,
                                   random_state=0)

        # the iterated power method is helping getting rid of the noise:
        assert_almost_equal(s[:k], sap, decimal=3) 
Example #10
Source File: PureSVDRecommender.py    From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 5 votes vote down vote up
def fit(self, num_factors=100, random_seed = None):

        self._print("Computing SVD decomposition...")

        U, Sigma, QT = randomized_svd(self.URM_train,
                                      n_components=num_factors,
                                      #n_iter=5,
                                      random_state = random_seed)

        U_s = U * sps.diags(Sigma)

        self.USER_factors = U_s
        self.ITEM_factors = QT.T

        self._print("Computing SVD decomposition... Done!") 
Example #11
Source File: test_extmath.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_randomized_svd_infinite_rank():
    # Check that extmath.randomized_svd can handle noisy matrices
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # let us try again without 'low_rank component': just regularly but slowly
    # decreasing singular values: the rank of the data matrix is infinite
    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=1.0,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)
    for normalizer in ['auto', 'none', 'LU', 'QR']:
        # compute the singular values of X using the fast approximate method
        # without the iterated power method
        _, sa, _ = randomized_svd(X, k, n_iter=0,
                                  power_iteration_normalizer=normalizer)

        # the approximation does not tolerate the noise:
        assert_greater(np.abs(s[:k] - sa).max(), 0.1)

        # compute the singular values of X using the fast approximate method
        # with iterated power method
        _, sap, _ = randomized_svd(X, k, n_iter=5,
                                   power_iteration_normalizer=normalizer)

        # the iterated power method is still managing to get most of the
        # structure at the requested rank
        assert_almost_equal(s[:k], sap, decimal=3) 
Example #12
Source File: makelowdim.py    From socialsent with Apache License 2.0 5 votes vote down vote up
def run(in_file, out_path, dim=300, keep_words=None): 
        base_embed = Explicit.load(in_file, normalize=False)
        if keep_words != None:
            base_embed = base_embed.get_subembed(keep_words)
        u, s, v = randomized_svd(base_embed.m, n_components=dim, n_iter=5)
        np.save(out_path + "-u.npy", u)
        np.save(out_path + "-v.npy", v)
        np.save(out_path + "-s.npy", s)
        util.write_pickle(base_embed.iw, out_path  + "-vocab.pkl") 
Example #13
Source File: test_extmath.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_randomized_svd_transpose_consistency():
    # Check that transposing the design matrix has limited impact
    n_samples = 100
    n_features = 500
    rank = 4
    k = 10

    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=0.5,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False,
                                random_state=0)
    U2, s2, V2 = randomized_svd(X, k, n_iter=3, transpose=True,
                                random_state=0)
    U3, s3, V3 = randomized_svd(X, k, n_iter=3, transpose='auto',
                                random_state=0)
    U4, s4, V4 = linalg.svd(X, full_matrices=False)

    assert_almost_equal(s1, s4[:k], decimal=3)
    assert_almost_equal(s2, s4[:k], decimal=3)
    assert_almost_equal(s3, s4[:k], decimal=3)

    assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)
    assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)

    # in this case 'auto' is equivalent to transpose
    assert_almost_equal(s2, s3) 
Example #14
Source File: test_extmath.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_randomized_svd_power_iteration_normalizer():
    # randomized_svd with power_iteration_normalized='none' diverges for
    # large number of power iterations on this dataset
    rng = np.random.RandomState(42)
    X = make_low_rank_matrix(100, 500, effective_rank=50, random_state=rng)
    X += 3 * rng.randint(0, 2, size=X.shape)
    n_components = 50

    # Check that it diverges with many (non-normalized) power iterations
    U, s, V = randomized_svd(X, n_components, n_iter=2,
                             power_iteration_normalizer='none')
    A = X - U.dot(np.diag(s).dot(V))
    error_2 = linalg.norm(A, ord='fro')
    U, s, V = randomized_svd(X, n_components, n_iter=20,
                             power_iteration_normalizer='none')
    A = X - U.dot(np.diag(s).dot(V))
    error_20 = linalg.norm(A, ord='fro')
    assert_greater(np.abs(error_2 - error_20), 100)

    for normalizer in ['LU', 'QR', 'auto']:
        U, s, V = randomized_svd(X, n_components, n_iter=2,
                                 power_iteration_normalizer=normalizer,
                                 random_state=0)
        A = X - U.dot(np.diag(s).dot(V))
        error_2 = linalg.norm(A, ord='fro')

        for i in [5, 10, 50]:
            U, s, V = randomized_svd(X, n_components, n_iter=i,
                                     power_iteration_normalizer=normalizer,
                                     random_state=0)
            A = X - U.dot(np.diag(s).dot(V))
            error = linalg.norm(A, ord='fro')
            assert_greater(15, np.abs(error_2 - error)) 
Example #15
Source File: prone.py    From nodevectors with MIT License 5 votes vote down vote up
def tsvd_rand(matrix, n_components):
        """
        Sparse randomized tSVD for fast embedding
        """
        l = matrix.shape[0]
        # Is this csc conversion necessary?
        smat = sparse.csc_matrix(matrix)
        U, Sigma, VT = randomized_svd(smat, 
            n_components=n_components, 
            n_iter=5, random_state=None)
        U = U * np.sqrt(Sigma)
        U = preprocessing.normalize(U, "l2")
        return U 
Example #16
Source File: prone.py    From cogdl with MIT License 5 votes vote down vote up
def _get_embedding_rand(self, matrix):
        # Sparse randomized tSVD for fast embedding
        t1 = time.time()
        l = matrix.shape[0]
        smat = sp.csc_matrix(matrix)  # convert to sparse CSC format
        print("svd sparse", smat.data.shape[0] * 1.0 / l ** 2)
        U, Sigma, VT = randomized_svd(
            smat, n_components=self.dimension, n_iter=5, random_state=None
        )
        U = U * np.sqrt(Sigma)
        U = preprocessing.normalize(U, "l2")
        print("sparsesvd time", time.time() - t1)
        return U 
Example #17
Source File: netsmf.py    From cogdl with MIT License 5 votes vote down vote up
def _get_embedding_rand(self, matrix):
        # Sparse randomized tSVD for fast embedding
        t1 = time.time()
        l = matrix.shape[0]
        smat = sp.csc_matrix(matrix)
        print("svd sparse", smat.data.shape[0] * 1.0 / l ** 2)
        U, Sigma, VT = randomized_svd(
            smat, n_components=self.dimension, n_iter=5, random_state=None
        )
        U = U * np.sqrt(Sigma)
        U = preprocessing.normalize(U, "l2")
        print("sparsesvd time", time.time() - t1)
        return U 
Example #18
Source File: test_extmath.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_randomized_svd_sign_flip():
    a = np.array([[2.0, 0.0], [0.0, 1.0]])
    u1, s1, v1 = randomized_svd(a, 2, flip_sign=True, random_state=41)
    for seed in range(10):
        u2, s2, v2 = randomized_svd(a, 2, flip_sign=True, random_state=seed)
        assert_almost_equal(u1, u2)
        assert_almost_equal(v1, v2)
        assert_almost_equal(np.dot(u2 * s2, v2), a)
        assert_almost_equal(np.dot(u2.T, u2), np.eye(2))
        assert_almost_equal(np.dot(v2.T, v2), np.eye(2)) 
Example #19
Source File: proNE.py    From ProNE with MIT License 5 votes vote down vote up
def get_embedding_rand(self, matrix):
		# Sparse randomized tSVD for fast embedding
		t1 = time.time()
		l = matrix.shape[0]
		smat = scipy.sparse.csc_matrix(matrix)  # convert to sparse CSC format
		print('svd sparse', smat.data.shape[0] * 1.0 / l ** 2)
		U, Sigma, VT = randomized_svd(smat, n_components=self.dimension, n_iter=5, random_state=None)
		U = U * np.sqrt(Sigma)
		U = preprocessing.normalize(U, "l2")
		print('sparsesvd time', time.time() - t1)
		return U 
Example #20
Source File: model_fitter.py    From themarketingtechnologist with Apache License 2.0 5 votes vote down vote up
def apply_uv_decomposition(self):
        U, Sigma, VT = randomized_svd(self.behaviour_matrix,
                                      n_components=15,
                                      n_iter=10,
                                      random_state=None)
        print(U.shape)
        print(VT.shape)
        self.X_hat = np.dot(U, VT)  # U * np.diag(Sigma) 
Example #21
Source File: soft_impute.py    From fancyimpute with Apache License 2.0 5 votes vote down vote up
def _max_singular_value(self, X_filled):
        # quick decomposition of X_filled into rank-1 SVD
        _, s, _ = randomized_svd(
            X_filled,
            1,
            n_iter=5)
        return s[0] 
Example #22
Source File: soft_impute.py    From ME-Net with MIT License 5 votes vote down vote up
def _max_singular_value(self, X_filled):
        # quick decomposition of X_filled into rank-1 SVD
        _, s, _ = randomized_svd(
            X_filled,
            1,
            n_iter=5)
        return s[0] 
Example #23
Source File: prone.py    From CogDL-TensorFlow with MIT License 5 votes vote down vote up
def _get_embedding_rand(self, matrix):
        # Sparse randomized tSVD for fast embedding
        t1 = time.time()
        l = matrix.shape[0]
        smat = sp.csc_matrix(matrix)  # convert to sparse CSC format
        print("svd sparse", smat.data.shape[0] * 1.0 / l ** 2)
        U, Sigma, VT = randomized_svd(
            smat, n_components=self.dimension, n_iter=5, random_state=None
        )
        U = U * np.sqrt(Sigma)
        U = preprocessing.normalize(U, "l2")
        print("sparsesvd time", time.time() - t1)
        return U 
Example #24
Source File: svt_solver.py    From matrix-completion with Eclipse Public License 1.0 5 votes vote down vote up
def _my_svd(M, k, algorithm):
    if algorithm == 'randomized':
        (U, S, V) = randomized_svd(
            M, n_components=min(k, M.shape[1]-1), n_oversamples=20)
    elif algorithm == 'arpack':
        (U, S, V) = svds(M, k=min(k, min(M.shape)-1))
        S = S[::-1]
        U, V = svd_flip(U[:, ::-1], V[::-1])
    else:
        raise ValueError("unknown algorithm")
    return (U, S, V) 
Example #25
Source File: test_extmath.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_randomized_svd_sign_flip_with_transpose():
    # Check if the randomized_svd sign flipping is always done based on u
    # irrespective of transpose.
    # See https://github.com/scikit-learn/scikit-learn/issues/5608
    # for more details.
    def max_loading_is_positive(u, v):
        """
        returns bool tuple indicating if the values maximising np.abs
        are positive across all rows for u and across all columns for v.
        """
        u_based = (np.abs(u).max(axis=0) == u.max(axis=0)).all()
        v_based = (np.abs(v).max(axis=1) == v.max(axis=1)).all()
        return u_based, v_based

    mat = np.arange(10 * 8).reshape(10, -1)

    # Without transpose
    u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True)
    u_based, v_based = max_loading_is_positive(u_flipped, v_flipped)
    assert u_based
    assert not v_based

    # With transpose
    u_flipped_with_transpose, _, v_flipped_with_transpose = randomized_svd(
        mat, 3, flip_sign=True, transpose=True)
    u_based, v_based = max_loading_is_positive(
        u_flipped_with_transpose, v_flipped_with_transpose)
    assert u_based
    assert not v_based 
Example #26
Source File: test_extmath.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_randomized_svd_sparse_warnings():
    # randomized_svd throws a warning for lil and dok matrix
    rng = np.random.RandomState(42)
    X = make_low_rank_matrix(50, 20, effective_rank=10, random_state=rng)
    n_components = 5
    for cls in (sparse.lil_matrix, sparse.dok_matrix):
        X = cls(X)
        assert_warns_message(
            sparse.SparseEfficiencyWarning,
            "Calculating SVD of a {} is expensive. "
            "csr_matrix is more efficient.".format(cls.__name__),
            randomized_svd, X, n_components, n_iter=1,
            power_iteration_normalizer='none') 
Example #27
Source File: test_extmath.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_randomized_svd_power_iteration_normalizer():
    # randomized_svd with power_iteration_normalized='none' diverges for
    # large number of power iterations on this dataset
    rng = np.random.RandomState(42)
    X = make_low_rank_matrix(100, 500, effective_rank=50, random_state=rng)
    X += 3 * rng.randint(0, 2, size=X.shape)
    n_components = 50

    # Check that it diverges with many (non-normalized) power iterations
    U, s, V = randomized_svd(X, n_components, n_iter=2,
                             power_iteration_normalizer='none')
    A = X - U.dot(np.diag(s).dot(V))
    error_2 = linalg.norm(A, ord='fro')
    U, s, V = randomized_svd(X, n_components, n_iter=20,
                             power_iteration_normalizer='none')
    A = X - U.dot(np.diag(s).dot(V))
    error_20 = linalg.norm(A, ord='fro')
    assert_greater(np.abs(error_2 - error_20), 100)

    for normalizer in ['LU', 'QR', 'auto']:
        U, s, V = randomized_svd(X, n_components, n_iter=2,
                                 power_iteration_normalizer=normalizer,
                                 random_state=0)
        A = X - U.dot(np.diag(s).dot(V))
        error_2 = linalg.norm(A, ord='fro')

        for i in [5, 10, 50]:
            U, s, V = randomized_svd(X, n_components, n_iter=i,
                                     power_iteration_normalizer=normalizer,
                                     random_state=0)
            A = X - U.dot(np.diag(s).dot(V))
            error = linalg.norm(A, ord='fro')
            assert_greater(15, np.abs(error_2 - error)) 
Example #28
Source File: test_extmath.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_randomized_svd_transpose_consistency():
    # Check that transposing the design matrix has limited impact
    n_samples = 100
    n_features = 500
    rank = 4
    k = 10

    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=0.5,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False,
                                random_state=0)
    U2, s2, V2 = randomized_svd(X, k, n_iter=3, transpose=True,
                                random_state=0)
    U3, s3, V3 = randomized_svd(X, k, n_iter=3, transpose='auto',
                                random_state=0)
    U4, s4, V4 = linalg.svd(X, full_matrices=False)

    assert_almost_equal(s1, s4[:k], decimal=3)
    assert_almost_equal(s2, s4[:k], decimal=3)
    assert_almost_equal(s3, s4[:k], decimal=3)

    assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)
    assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]),
                        decimal=2)

    # in this case 'auto' is equivalent to transpose
    assert_almost_equal(s2, s3) 
Example #29
Source File: test_extmath.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_randomized_svd_infinite_rank():
    # Check that extmath.randomized_svd can handle noisy matrices
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # let us try again without 'low_rank component': just regularly but slowly
    # decreasing singular values: the rank of the data matrix is infinite
    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=1.0,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)
    for normalizer in ['auto', 'none', 'LU', 'QR']:
        # compute the singular values of X using the fast approximate method
        # without the iterated power method
        _, sa, _ = randomized_svd(X, k, n_iter=0,
                                  power_iteration_normalizer=normalizer)

        # the approximation does not tolerate the noise:
        assert_greater(np.abs(s[:k] - sa).max(), 0.1)

        # compute the singular values of X using the fast approximate method
        # with iterated power method
        _, sap, _ = randomized_svd(X, k, n_iter=5,
                                   power_iteration_normalizer=normalizer)

        # the iterated power method is still managing to get most of the
        # structure at the requested rank
        assert_almost_equal(s[:k], sap, decimal=3) 
Example #30
Source File: test_extmath.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_randomized_svd_low_rank_with_noise():
    # Check that extmath.randomized_svd can handle noisy matrices
    n_samples = 100
    n_features = 500
    rank = 5
    k = 10

    # generate a matrix X wity structure approximate rank `rank` and an
    # important noisy component
    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                             effective_rank=rank, tail_strength=0.1,
                             random_state=0)
    assert_equal(X.shape, (n_samples, n_features))

    # compute the singular values of X using the slow exact method
    _, s, _ = linalg.svd(X, full_matrices=False)

    for normalizer in ['auto', 'none', 'LU', 'QR']:
        # compute the singular values of X using the fast approximate
        # method without the iterated power method
        _, sa, _ = randomized_svd(X, k, n_iter=0,
                                  power_iteration_normalizer=normalizer,
                                  random_state=0)

        # the approximation does not tolerate the noise:
        assert_greater(np.abs(s[:k] - sa).max(), 0.01)

        # compute the singular values of X using the fast approximate
        # method with iterated power method
        _, sap, _ = randomized_svd(X, k,
                                   power_iteration_normalizer=normalizer,
                                   random_state=0)

        # the iterated power method is helping getting rid of the noise:
        assert_almost_equal(s[:k], sap, decimal=3)