Python sklearn.decomposition.PCA Examples

The following are 30 code examples of sklearn.decomposition.PCA(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.decomposition , or try the search function

Example #1

Source File: embedding.py From DeepDIVA with GNU Lesser General Public License v3.0

6 votes

def pca(features, n_components=2):
    """
    Returns the embedded points for PCA.
    Parameters
    ----------
    features: numpy.ndarray
        contains the input feature vectors.
    n_components: int
        number of components to transform the features into

    Returns
    -------
    embedding: numpy.ndarray
        x,y(z) points that the feature vectors have been transformed into
    """
    embedding = PCA(n_components=n_components).fit_transform(features)
    return embedding

########################################################################################################################

Example #2

Source File: CategoryProjector.py From scattertext with Apache License 2.0

6 votes

def __init__(self,
                 weighter=LengthNormalizer(),
                 normalizer=StandardScaler(),
                 selector=AssociationCompactor(1000, RankDifference),
                 projector=PCA(2)):
        '''

        :param weighter: instance of an sklearn class with fit_transform to weight X category corpus.
        :param normalizer: instance of an sklearn class with fit_transform to normalize term X category corpus.
        :param selector: instance of a compactor class, if None, no compaction will be done.
        :param projector: instance an sklearn class with fit_transform
        '''
        self.weighter_ = weighter
        self.normalizer_ = normalizer
        self.selector_ = selector
        self.projector_ = projector

Example #3

Source File: dataset.py From neural-combinatorial-optimization-rl-tensorflow with MIT License

6 votes

def gen_instance(self, max_length, dimension, test_mode=True, seed=0):
        if seed!=0: np.random.seed(seed)

        # Randomly generate (max_length) cities with (dimension) coordinates in [0,100]
        seq = np.random.randint(100, size=(max_length, dimension))

        # Principal Component Analysis to center & rotate coordinates
        pca = PCA(n_components=dimension)
        sequence = pca.fit_transform(seq)

        # Scale to [0,1[
        input_ = sequence/100

        if test_mode == True:
            return input_, seq
        else:
            return input_

    # Generate random batch for training procedure

Example #4

Source File: CategoryProjector.py From scattertext with Apache License 2.0

6 votes

def __init__(self, doc2vec_builder=None, projector=PCA(2)):
        '''

        :param doc2vec_builder: Doc2VecBuilder, optional
            If None, a default model will be used
        :param projector: object
            Has fit_transform method
        '''
        if doc2vec_builder is None:
            try:
                import gensim
            except:
                raise Exception("Please install gensim before using Doc2VecCategoryProjector/")
            self.doc2vec_builder = Doc2VecBuilder(
                gensim.models.Doc2Vec(vector_size=100, window=5, min_count=5, workers=6, alpha=0.025,
                                      min_alpha=0.025, epochs=50)
            )
        else:
            assert type(doc2vec_builder) == Doc2VecBuilder
            self.doc2vec_builder = doc2vec_builder
        self.projector = projector

Example #5

Source File: embedding.py From BrainSpace with BSD 3-Clause "New" or "Revised" License

6 votes

def fit(self, x):
        """ Compute PCA.

        Parameters
        ----------
        x : ndarray, shape(n_samples, n_feat)
            Input matrix.

        Returns
        -------
        self : object
            Returns self.

        """

        pca = PCA(n_components=self.n_components,
                  random_state=self.random_state)
        self.maps_ = pca.fit_transform(x)
        self.lambdas_ = pca.explained_variance_

        return self

Example #6

Source File: cluster_features.py From bert-extractive-summarizer with MIT License

6 votes

def __init__(
        self,
        features: ndarray,
        algorithm: str = 'kmeans',
        pca_k: int = None,
        random_state: int = 12345
    ):
        """
        :param features: the embedding matrix created by bert parent
        :param algorithm: Which clustering algorithm to use
        :param pca_k: If you want the features to be ran through pca, this is the components number
        :param random_state: Random state
        """

        if pca_k:
            self.features = PCA(n_components=pca_k).fit_transform(features)
        else:
            self.features = features

        self.algorithm = algorithm
        self.pca_k = pca_k
        self.random_state = random_state

Example #7

Source File: multipca.py From PynPoint with GNU General Public License v3.0

6 votes

def create_writer(self,
                      image_out_port: None) -> PcaTaskWriter:
        """
        Method to create an instance of PcaTaskWriter.

        Parameters
        ----------
        image_out_port : None
            Output port, not used.

        Returns
        -------
        pynpoint.util.multipca.PcaTaskWriter
            PCA task writer.
        """

        return PcaTaskWriter(self.m_result_queue,
                             self.m_mean_out_port,
                             self.m_median_out_port,
                             self.m_weighted_out_port,
                             self.m_clip_out_port,
                             self.m_data_mutex,
                             self.m_requirements)

Example #8

Source File: multipca.py From PynPoint with GNU General Public License v3.0

6 votes

def init_creator(self,
                     image_in_port: None) -> PcaTaskCreator:
        """
        Method to create an instance of PcaTaskCreator.

        Parameters
        ----------
        image_in_port : None
            Input port, not used.

        Returns
        -------
        pynpoint.util.multipca.PcaTaskCreator
            PCA task creator.
        """

        return PcaTaskCreator(self.m_tasks_queue,
                              self.m_num_proc,
                              self.m_pca_numbers)

Example #9

Source File: post_proc.py From HorizonNet with MIT License

6 votes

def get_rot_rad(init_coorx, coory, z=50, coorW=1024, coorH=512, floorW=1024, floorH=512, tol=5):
    gpid = get_gpid(init_coorx, coorW)
    coor = np.hstack([np.arange(coorW)[:, None], coory[:, None]])
    xy = np_coor2xy(coor, z, coorW, coorH, floorW, floorH)
    xy_cor = []

    rot_rad_suggestions = []
    for j in range(len(init_coorx)):
        pca = PCA(n_components=1)
        pca.fit(xy[gpid == j])
        rot_rad_suggestions.append(_get_rot_rad(*pca.components_[0]))
    rot_rad_suggestions = np.sort(rot_rad_suggestions + [1e9])

    rot_rad = np.mean(rot_rad_suggestions[:-1])
    best_rot_rad_sz = -1
    last_j = 0
    for j in range(1, len(rot_rad_suggestions)):
        if rot_rad_suggestions[j] - rot_rad_suggestions[j-1] > tol:
            last_j = j
        elif j - last_j > best_rot_rad_sz:
            rot_rad = rot_rad_suggestions[last_j:j+1].mean()
            best_rot_rad_sz = j - last_j

    dx = int(round(rot_rad * 1024 / 360))
    return dx, rot_rad

Example #10

Source File: DimensionReduction.py From FAE with GNU General Public License v3.0

6 votes

def Transform(self, data_container, store_folder='', store_key=''):
        data = data_container.GetArray()
        if data.shape[1] != self.GetModel().components_.shape[1]:
            print('Data can not be transformed by existed PCA')
        sub_data = self.GetModel().transform(data)

        sub_feature_name = ['PCA_feature_' + str(index) for index in
                            range(1, super(DimensionReductionByPCA, self).GetRemainedNumber() + 1)]

        new_data_container = deepcopy(data_container)
        new_data_container.SetArray(sub_data)
        new_data_container.SetFeatureName(sub_feature_name)
        new_data_container.UpdateFrameByData()

        if store_folder:
            self.SaveDataContainer(data_container, store_folder, store_key)

        return new_data_container

Example #11

Source File: edgeConstruction.py From DCC with MIT License

6 votes

def parse_args():
    """ Parse input arguments """
    parser = argparse.ArgumentParser(description='Feature extraction for RCC algorithm')

    parser.add_argument('--dataset', default=None, type=str,
                        help='The entered dataset file must be in the Data folder')
    parser.add_argument('--prep', dest='prep', default='none', type=str,
                        help='preprocessing of data: scale,minmax,normalization,none')
    parser.add_argument('--algo', dest='algo', default='mknn', type=str,
                        help='Algorithm to use: knn,mknn')
    parser.add_argument('--k', dest='k', default=10, type=int,
                        help='Number of nearest neighbor to consider')
    parser.add_argument('--pca', dest='pca', default=None, type=int,
                        help='Dimension of PCA processing before kNN graph construction')
    parser.add_argument('--samples', dest='nsamples', default=0, type=int,
                        help='total samples to consider')
    parser.add_argument('--format', choices=['mat', 'pkl', 'h5'], default='mat', help='Dataset format')

    args = parser.parse_args()
    return args

Example #12

Source File: data_utils.py From CalibrationNN with GNU General Public License v3.0

6 votes

def pca(self, **kwargs):
        if 'n_components' in kwargs:
            nComp = kwargs['n_components']
        else:
            nComp = 0.995

        if 'dates' in kwargs:
            mat = self.to_matrix(kwargs['dates'])
        else:
            mat = self.to_matrix()
        scaler = StandardScaler()
        pca = PCA(n_components=nComp)
        self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
        self._pipeline.fit(mat)
        
        if 'file' in kwargs:
            tofile(kwargs['file'], self._pipeline)
        
        return self._pipeline

Example #13

Source File: example7.py From bert-as-service with MIT License

6 votes

def vis(embed, vis_alg='PCA', pool_alg='REDUCE_MEAN'):
    plt.close()
    fig = plt.figure()
    plt.rcParams['figure.figsize'] = [21, 7]
    for idx, ebd in enumerate(embed):
        ax = plt.subplot(2, 6, idx + 1)
        vis_x = ebd[:, 0]
        vis_y = ebd[:, 1]
        plt.scatter(vis_x, vis_y, c=subset_label, cmap=ListedColormap(["blue", "green", "yellow", "red"]), marker='.',
                    alpha=0.7, s=2)
        ax.set_title('pool_layer=-%d' % (idx + 1))
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.1, right=0.95, top=0.9)
    cax = plt.axes([0.96, 0.1, 0.01, 0.3])
    cbar = plt.colorbar(cax=cax, ticks=range(num_label))
    cbar.ax.get_yaxis().set_ticks([])
    for j, lab in enumerate(['ent.', 'bus.', 'sci.', 'heal.']):
        cbar.ax.text(.5, (2 * j + 1) / 8.0, lab, ha='center', va='center', rotation=270)
    fig.suptitle('%s visualization of BERT layers using "bert-as-service" (-pool_strategy=%s)' % (vis_alg, pool_alg),
                 fontsize=14)
    plt.show()

Example #14

Source File: fisher_iris_visualization.py From blender-scripting with MIT License

6 votes

def PCA(data, num_components=None):
    # mean center the data
    data -= data.mean(axis=0)
    # calculate the covariance matrix
    R = np.cov(data, rowvar=False)
    # calculate eigenvectors & eigenvalues of the covariance matrix
    # use 'eigh' rather than 'eig' since R is symmetric,
    # the performance gain is substantial
    V, E = np.linalg.eigh(R)
    # sort eigenvalue in decreasing order
    idx = np.argsort(V)[::-1]
    E = E[:,idx]
    # sort eigenvectors according to same index
    V = V[idx]
    # select the first n eigenvectors (n is desired dimension
    # of rescaled data array, or dims_rescaled_data)
    E = E[:, :num_components]
    # carry out the transformation on the data using eigenvectors
    # and return the re-scaled data, eigenvalues, and eigenvectors
    return np.dot(E.T, data.T).T, V, E

Example #15

Source File: neuagent.py From dl4ir-webnav with BSD 3-Clause "New" or "Revised" License

6 votes

def load_wemb(params, vocab):
    wemb = pkl.load(open(prm.wordemb_path, 'rb'))
    dim_emb_orig = wemb.values()[0].shape[0]

    W = 0.01 * np.random.randn(prm.n_words, dim_emb_orig).astype(config.floatX)
    for word, pos in vocab.items():
        if word in wemb:
            W[pos,:] = wemb[word]
    
    if prm.dim_emb < dim_emb_orig:
        pca =PCA(n_components=prm.dim_emb, copy=False, whiten=True)
        W = pca.fit_transform(W)

    params['W'] = W

    return params

Example #16

Source File: DimensionReduction.py From FAE with GNU General Public License v3.0

5 votes

def GetDescription(self):
        text = "Since the dimension of feature space was high, we applied principle component analysis (PCA) on the feature matrix. " \
               "The feature vector of the transformed feature matrix was independent to each other. "
        return text

Example #17

Source File: test_models.py From revrand with Apache License 2.0

5 votes

def test_pipeline_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))
    estimators = [('PCA', PCA()),
                  ('SLM', slm)]
    pipe = Pipeline(estimators)

    pipe.fit(X, y)
    Ey = pipe.predict(Xs)
    assert smse(ys, Ey) < 0.1

Example #18

Source File: regression_multicollinearity.py From practicalDataAnalysisCookbook with GNU General Public License v2.0

5 votes

def reduce_PCA(x, n):
    '''
        Reduce the dimensions using Principal Component
        Analysis 
    '''
    # create the PCA object
    pca = dc.PCA(n_components=n, whiten=True)

    # learn the principal components from all the features
    return pca.fit(x)

# the file name of the dataset

Example #19

Source File: reduce_randomizedPCA.py From practicalDataAnalysisCookbook with GNU General Public License v2.0

5 votes

def reduce_PCA(x):
    '''
        Reduce the dimensions using Principal Component
        Analysis 
    '''
    # create the PCA object
    pca = dc.PCA(n_components=2, whiten=True)

    # learn the principal components from all the features
    return pca.fit(x)

Example #20

Source File: test_transformers.py From gordo with GNU Affero General Public License v3.0

5 votes

def _validate_transformer(self, transformer):
        """
        Inserts a transformer into the middle of a pipeline and runs it
        """
        pipe = Pipeline([("pca1", PCA()), ("custom", transformer), ("pca2", PCA())])
        X = np.random.random(size=100).reshape(10, 10)
        pipe.fit_transform(X)

Example #21

Source File: DimensionReduction.py From FAE with GNU General Public License v3.0

5 votes

def SetRemainedNumber(self, number):
        super(DimensionReductionByPCA, self).SetRemainedNumber(number)
        super(DimensionReductionByPCA, self).SetModel(PCA(n_components=super(DimensionReductionByPCA, self).GetRemainedNumber()))

Example #22

Source File: multipca.py From PynPoint with GNU General Public License v3.0

5 votes

def create_processors(self) -> List[PcaTaskProcessor]:
        """
        Method to create a list of instances of PcaTaskProcessor.

        Returns
        -------
        list(pynpoint.util.multipca.PcaTaskProcessor, )
            PCA task processors.
        """

        processors = []

        for _ in range(self.m_num_proc):

            processors.append(PcaTaskProcessor(self.m_tasks_queue,
                                               self.m_result_queue,
                                               self.m_star_reshape,
                                               self.m_angles,
                                               self.m_scales,
                                               self.m_pca_model,
                                               self.m_im_shape,
                                               self.m_indices,
                                               self.m_requirements,
                                               self.m_processing_type))

        return processors

Example #23

Source File: test_decompose.py From skutil with BSD 3-Clause "New" or "Revised" License

5 votes

def test_selective_pca():
    original = X
    cols = [original.columns[0]]  # Only perform on first...
    compare_cols = np.array(original[['sepal width (cm)', 'petal length (cm)',
                                      'petal width (cm)']].as_matrix())  # should be the same as the trans cols

    transformer = SelectivePCA(cols=cols, n_components=0.85).fit(original)
    transformed = transformer.transform(original)

    untouched_cols = np.array(transformed[['sepal width (cm)', 'petal length (cm)', 'petal width (cm)']].as_matrix())
    assert_array_almost_equal(compare_cols, untouched_cols)
    assert 'PC1' in transformed.columns
    assert transformed.shape[1] == 4
    assert isinstance(transformer.get_decomposition(), PCA)
    assert SelectivePCA().get_decomposition() is None

    # test the selective mixin
    assert isinstance(transformer.cols, list)

    # what if we want to weight it?
    pca_df = SelectivePCA(weight=True, n_components=0.99, as_df=False).fit_transform(original)
    pca_arr = SelectivePCA(weight=True, n_components=0.99, as_df=False).fit_transform(iris.data)
    assert_array_equal(pca_df, pca_arr)

    # hack to assert they are not equal if weighted
    pca_arr = SelectivePCA(weight=False, n_components=0.99, as_df=False).fit_transform(iris.data)
    assert_fails(assert_array_equal, AssertionError, pca_df, pca_arr)

Example #24

Source File: decompose.py From skutil with BSD 3-Clause "New" or "Revised" License

5 votes

def score(self, X, y=None):
        """Return the average log-likelihood of all samples.
        This calls sklearn.decomposition.PCA's score method
        on the specified columns [1].

        Parameters
        ----------

        X: Pandas ``DataFrame``, shape=(n_samples, n_features)
            The data to score.

        y: None
            Passthrough for pipeline/gridsearch


        Returns
        -------

        ll: float
            Average log-likelihood of the samples under the fit
            PCA model (`self.pca_`)


        References
        ----------

        .. [1] Bishop, C.  "Pattern Recognition and Machine Learning"
               12.2.1 p. 574 http://www.miketipping.com/papers/met-mppca.pdf
        """
        check_is_fitted(self, 'pca_')
        X, _ = validate_is_pd(X, self.cols)
        cols = X.columns if not self.cols else self.cols

        ll = self.pca_.score(X[cols].as_matrix(), _as_numpy(y))
        return ll

Example #25

Source File: decompose.py From skutil with BSD 3-Clause "New" or "Revised" License

5 votes

def get_decomposition(self):
        """Overridden from the :class:``skutil.decomposition.decompose._BaseSelectiveDecomposer`` class,
        this method returns the internal decomposition class: 
        ``sklearn.decomposition.PCA``

        Returns
        -------
        self.pca_ : ``sklearn.decomposition.PCA``
            The fit internal decomposition class
        """
        return self.pca_ if hasattr(self, 'pca_') else None

Example #26

Source File: decompose.py From skutil with BSD 3-Clause "New" or "Revised" License

5 votes

def fit(self, X, y=None):
        """Fit the transformer.

        Parameters
        ----------

        X : Pandas ``DataFrame``, shape=(n_samples, n_features)
            The Pandas frame to fit. The frame will only
            be fit on the prescribed ``cols`` (see ``__init__``) or
            all of them if ``cols`` is None. Furthermore, ``X`` will
            not be altered in the process of the fit.

        y : None
            Passthrough for ``sklearn.pipeline.Pipeline``. Even
            if explicitly set, will not change behavior of ``fit``.

        Returns
        -------

        self
        """
        # check on state of X and cols
        X, self.cols = validate_is_pd(X, self.cols)
        cols = _cols_if_none(X, self.cols)

        # fails thru if names don't exist:
        self.pca_ = PCA(
            n_components=self.n_components,
            whiten=self.whiten).fit(X[cols].as_matrix())

        return self

Example #27

Source File: utils.py From MNIST-baselines with MIT License

5 votes

def skPCA(data, dim):
    model = PCA(n_components=dim)
    model.fit(data)
    return model.transform(data)

Example #28

Source File: OptimalProjection.py From scattertext with Apache License 2.0

5 votes

def get_optimal_category_projection_by_rank(
        corpus,
        n_dims=2,
        n_steps=20,
        projector=lambda rank, n_dims: CategoryProjector(AssociationCompactorByRank(rank),
                                                         projector=PCA(n_dims)),
        verbose=False
):
    try:
        from astropy.stats import RipleysKEstimator
    except:
        raise Exception("Please install astropy")

    ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.)
    min_dev = None
    best_rank = None
    best_x = None
    best_y = None
    best_projector = None
    for rank in np.linspace(1, TermCategoryRanker().get_max_rank(corpus), n_steps):

        r = np.linspace(0, np.sqrt(2), 100)
        category_projector = projector(rank, n_dims)
        category_projection = category_projector.project(corpus)
        for dim_1 in range(0, n_dims):
            for dim_2 in range(dim_1 + 1, n_dims):
                proj = category_projection.projection[:, [dim_1, dim_2]]
                scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T
                dev = np.sum(np.abs(ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r)))
                if min_dev is None or dev < min_dev:
                    min_dev = dev
                    best_rank = rank
                    best_projector = category_projector
                    best_x, best_y = (dim_1, dim_2)
                if verbose:
                    print('rank', rank, 'dims', dim_1, dim_2, 'K', dev)
                    print('     best rank', best_rank, 'dims', best_x, best_y, 'K', min_dev)
    if verbose:
        print(best_rank, best_x, best_y)
    return best_projector.project(corpus, best_x, best_y)

Example #29

Source File: util.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License

5 votes

def kmean_pca_batch(data, batch, k=10):
    data = np.asarray(data, dtype=np.float32)
    batch = np.asarray(batch, dtype=np.float32)
    a = np.zeros(batch.shape[0])
    for i in np.arange(batch.shape[0]):
        tmp = np.concatenate((data, [batch[i]]))
        tmp_pca = PCA(n_components=2).fit_transform(tmp)
        a[i] = mle_single(tmp_pca[:-1], tmp_pca[-1], k=k)
    return a

Example #30

Source File: pca.py From classification-of-encrypted-traffic with MIT License

5 votes

def runpca(X, num_comp=None):
    pca = PCA(n_components=num_comp, svd_solver='full')
    pca.fit(X)
    # print(pca.n_components_)
    # print(pca.explained_variance_ratio_)
    # print(sum(pca.explained_variance_ratio_))
    return pca