Python sklearn.decomposition.PCA Examples

The following are 30 code examples of sklearn.decomposition.PCA(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.decomposition , or try the search function .
Example #1
Source File: embedding.py    From DeepDIVA with GNU Lesser General Public License v3.0 6 votes vote down vote up
def pca(features, n_components=2):
    """
    Returns the embedded points for PCA.
    Parameters
    ----------
    features: numpy.ndarray
        contains the input feature vectors.
    n_components: int
        number of components to transform the features into

    Returns
    -------
    embedding: numpy.ndarray
        x,y(z) points that the feature vectors have been transformed into
    """
    embedding = PCA(n_components=n_components).fit_transform(features)
    return embedding

######################################################################################################################## 
Example #2
Source File: CategoryProjector.py    From scattertext with Apache License 2.0 6 votes vote down vote up
def __init__(self,
                 weighter=LengthNormalizer(),
                 normalizer=StandardScaler(),
                 selector=AssociationCompactor(1000, RankDifference),
                 projector=PCA(2)):
        '''

        :param weighter: instance of an sklearn class with fit_transform to weight X category corpus.
        :param normalizer: instance of an sklearn class with fit_transform to normalize term X category corpus.
        :param selector: instance of a compactor class, if None, no compaction will be done.
        :param projector: instance an sklearn class with fit_transform
        '''
        self.weighter_ = weighter
        self.normalizer_ = normalizer
        self.selector_ = selector
        self.projector_ = projector 
Example #3
Source File: dataset.py    From neural-combinatorial-optimization-rl-tensorflow with MIT License 6 votes vote down vote up
def gen_instance(self, max_length, dimension, test_mode=True, seed=0):
        if seed!=0: np.random.seed(seed)

        # Randomly generate (max_length) cities with (dimension) coordinates in [0,100]
        seq = np.random.randint(100, size=(max_length, dimension))

        # Principal Component Analysis to center & rotate coordinates
        pca = PCA(n_components=dimension)
        sequence = pca.fit_transform(seq)

        # Scale to [0,1[
        input_ = sequence/100

        if test_mode == True:
            return input_, seq
        else:
            return input_

    # Generate random batch for training procedure 
Example #4
Source File: CategoryProjector.py    From scattertext with Apache License 2.0 6 votes vote down vote up
def __init__(self, doc2vec_builder=None, projector=PCA(2)):
        '''

        :param doc2vec_builder: Doc2VecBuilder, optional
            If None, a default model will be used
        :param projector: object
            Has fit_transform method
        '''
        if doc2vec_builder is None:
            try:
                import gensim
            except:
                raise Exception("Please install gensim before using Doc2VecCategoryProjector/")
            self.doc2vec_builder = Doc2VecBuilder(
                gensim.models.Doc2Vec(vector_size=100, window=5, min_count=5, workers=6, alpha=0.025,
                                      min_alpha=0.025, epochs=50)
            )
        else:
            assert type(doc2vec_builder) == Doc2VecBuilder
            self.doc2vec_builder = doc2vec_builder
        self.projector = projector 
Example #5
Source File: embedding.py    From BrainSpace with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, x):
        """ Compute PCA.

        Parameters
        ----------
        x : ndarray, shape(n_samples, n_feat)
            Input matrix.

        Returns
        -------
        self : object
            Returns self.

        """

        pca = PCA(n_components=self.n_components,
                  random_state=self.random_state)
        self.maps_ = pca.fit_transform(x)
        self.lambdas_ = pca.explained_variance_

        return self 
Example #6
Source File: cluster_features.py    From bert-extractive-summarizer with MIT License 6 votes vote down vote up
def __init__(
        self,
        features: ndarray,
        algorithm: str = 'kmeans',
        pca_k: int = None,
        random_state: int = 12345
    ):
        """
        :param features: the embedding matrix created by bert parent
        :param algorithm: Which clustering algorithm to use
        :param pca_k: If you want the features to be ran through pca, this is the components number
        :param random_state: Random state
        """

        if pca_k:
            self.features = PCA(n_components=pca_k).fit_transform(features)
        else:
            self.features = features

        self.algorithm = algorithm
        self.pca_k = pca_k
        self.random_state = random_state 
Example #7
Source File: multipca.py    From PynPoint with GNU General Public License v3.0 6 votes vote down vote up
def create_writer(self,
                      image_out_port: None) -> PcaTaskWriter:
        """
        Method to create an instance of PcaTaskWriter.

        Parameters
        ----------
        image_out_port : None
            Output port, not used.

        Returns
        -------
        pynpoint.util.multipca.PcaTaskWriter
            PCA task writer.
        """

        return PcaTaskWriter(self.m_result_queue,
                             self.m_mean_out_port,
                             self.m_median_out_port,
                             self.m_weighted_out_port,
                             self.m_clip_out_port,
                             self.m_data_mutex,
                             self.m_requirements) 
Example #8
Source File: multipca.py    From PynPoint with GNU General Public License v3.0 6 votes vote down vote up
def init_creator(self,
                     image_in_port: None) -> PcaTaskCreator:
        """
        Method to create an instance of PcaTaskCreator.

        Parameters
        ----------
        image_in_port : None
            Input port, not used.

        Returns
        -------
        pynpoint.util.multipca.PcaTaskCreator
            PCA task creator.
        """

        return PcaTaskCreator(self.m_tasks_queue,
                              self.m_num_proc,
                              self.m_pca_numbers) 
Example #9
Source File: post_proc.py    From HorizonNet with MIT License 6 votes vote down vote up
def get_rot_rad(init_coorx, coory, z=50, coorW=1024, coorH=512, floorW=1024, floorH=512, tol=5):
    gpid = get_gpid(init_coorx, coorW)
    coor = np.hstack([np.arange(coorW)[:, None], coory[:, None]])
    xy = np_coor2xy(coor, z, coorW, coorH, floorW, floorH)
    xy_cor = []

    rot_rad_suggestions = []
    for j in range(len(init_coorx)):
        pca = PCA(n_components=1)
        pca.fit(xy[gpid == j])
        rot_rad_suggestions.append(_get_rot_rad(*pca.components_[0]))
    rot_rad_suggestions = np.sort(rot_rad_suggestions + [1e9])

    rot_rad = np.mean(rot_rad_suggestions[:-1])
    best_rot_rad_sz = -1
    last_j = 0
    for j in range(1, len(rot_rad_suggestions)):
        if rot_rad_suggestions[j] - rot_rad_suggestions[j-1] > tol:
            last_j = j
        elif j - last_j > best_rot_rad_sz:
            rot_rad = rot_rad_suggestions[last_j:j+1].mean()
            best_rot_rad_sz = j - last_j

    dx = int(round(rot_rad * 1024 / 360))
    return dx, rot_rad 
Example #10
Source File: DimensionReduction.py    From FAE with GNU General Public License v3.0 6 votes vote down vote up
def Transform(self, data_container, store_folder='', store_key=''):
        data = data_container.GetArray()
        if data.shape[1] != self.GetModel().components_.shape[1]:
            print('Data can not be transformed by existed PCA')
        sub_data = self.GetModel().transform(data)

        sub_feature_name = ['PCA_feature_' + str(index) for index in
                            range(1, super(DimensionReductionByPCA, self).GetRemainedNumber() + 1)]

        new_data_container = deepcopy(data_container)
        new_data_container.SetArray(sub_data)
        new_data_container.SetFeatureName(sub_feature_name)
        new_data_container.UpdateFrameByData()

        if store_folder:
            self.SaveDataContainer(data_container, store_folder, store_key)

        return new_data_container 
Example #11
Source File: edgeConstruction.py    From DCC with MIT License 6 votes vote down vote up
def parse_args():
    """ Parse input arguments """
    parser = argparse.ArgumentParser(description='Feature extraction for RCC algorithm')

    parser.add_argument('--dataset', default=None, type=str,
                        help='The entered dataset file must be in the Data folder')
    parser.add_argument('--prep', dest='prep', default='none', type=str,
                        help='preprocessing of data: scale,minmax,normalization,none')
    parser.add_argument('--algo', dest='algo', default='mknn', type=str,
                        help='Algorithm to use: knn,mknn')
    parser.add_argument('--k', dest='k', default=10, type=int,
                        help='Number of nearest neighbor to consider')
    parser.add_argument('--pca', dest='pca', default=None, type=int,
                        help='Dimension of PCA processing before kNN graph construction')
    parser.add_argument('--samples', dest='nsamples', default=0, type=int,
                        help='total samples to consider')
    parser.add_argument('--format', choices=['mat', 'pkl', 'h5'], default='mat', help='Dataset format')

    args = parser.parse_args()
    return args 
Example #12
Source File: data_utils.py    From CalibrationNN with GNU General Public License v3.0 6 votes vote down vote up
def pca(self, **kwargs):
        if 'n_components' in kwargs:
            nComp = kwargs['n_components']
        else:
            nComp = 0.995

        if 'dates' in kwargs:
            mat = self.to_matrix(kwargs['dates'])
        else:
            mat = self.to_matrix()
        scaler = StandardScaler()
        pca = PCA(n_components=nComp)
        self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
        self._pipeline.fit(mat)
        
        if 'file' in kwargs:
            tofile(kwargs['file'], self._pipeline)
        
        return self._pipeline 
Example #13
Source File: example7.py    From bert-as-service with MIT License 6 votes vote down vote up
def vis(embed, vis_alg='PCA', pool_alg='REDUCE_MEAN'):
    plt.close()
    fig = plt.figure()
    plt.rcParams['figure.figsize'] = [21, 7]
    for idx, ebd in enumerate(embed):
        ax = plt.subplot(2, 6, idx + 1)
        vis_x = ebd[:, 0]
        vis_y = ebd[:, 1]
        plt.scatter(vis_x, vis_y, c=subset_label, cmap=ListedColormap(["blue", "green", "yellow", "red"]), marker='.',
                    alpha=0.7, s=2)
        ax.set_title('pool_layer=-%d' % (idx + 1))
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.1, right=0.95, top=0.9)
    cax = plt.axes([0.96, 0.1, 0.01, 0.3])
    cbar = plt.colorbar(cax=cax, ticks=range(num_label))
    cbar.ax.get_yaxis().set_ticks([])
    for j, lab in enumerate(['ent.', 'bus.', 'sci.', 'heal.']):
        cbar.ax.text(.5, (2 * j + 1) / 8.0, lab, ha='center', va='center', rotation=270)
    fig.suptitle('%s visualization of BERT layers using "bert-as-service" (-pool_strategy=%s)' % (vis_alg, pool_alg),
                 fontsize=14)
    plt.show() 
Example #14
Source File: fisher_iris_visualization.py    From blender-scripting with MIT License 6 votes vote down vote up
def PCA(data, num_components=None):
    # mean center the data
    data -= data.mean(axis=0)
    # calculate the covariance matrix
    R = np.cov(data, rowvar=False)
    # calculate eigenvectors & eigenvalues of the covariance matrix
    # use 'eigh' rather than 'eig' since R is symmetric,
    # the performance gain is substantial
    V, E = np.linalg.eigh(R)
    # sort eigenvalue in decreasing order
    idx = np.argsort(V)[::-1]
    E = E[:,idx]
    # sort eigenvectors according to same index
    V = V[idx]
    # select the first n eigenvectors (n is desired dimension
    # of rescaled data array, or dims_rescaled_data)
    E = E[:, :num_components]
    # carry out the transformation on the data using eigenvectors
    # and return the re-scaled data, eigenvalues, and eigenvectors
    return np.dot(E.T, data.T).T, V, E 
Example #15
Source File: neuagent.py    From dl4ir-webnav with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_wemb(params, vocab):
    wemb = pkl.load(open(prm.wordemb_path, 'rb'))
    dim_emb_orig = wemb.values()[0].shape[0]

    W = 0.01 * np.random.randn(prm.n_words, dim_emb_orig).astype(config.floatX)
    for word, pos in vocab.items():
        if word in wemb:
            W[pos,:] = wemb[word]
    
    if prm.dim_emb < dim_emb_orig:
        pca =PCA(n_components=prm.dim_emb, copy=False, whiten=True)
        W = pca.fit_transform(W)

    params['W'] = W

    return params 
Example #16
Source File: DimensionReduction.py    From FAE with GNU General Public License v3.0 5 votes vote down vote up
def GetDescription(self):
        text = "Since the dimension of feature space was high, we applied principle component analysis (PCA) on the feature matrix. " \
               "The feature vector of the transformed feature matrix was independent to each other. "
        return text 
Example #17
Source File: test_models.py    From revrand with Apache License 2.0 5 votes vote down vote up
def test_pipeline_slm(make_gaus_data):

    X, y, Xs, ys = make_gaus_data

    slm = StandardLinearModel(LinearBasis(onescol=True))
    estimators = [('PCA', PCA()),
                  ('SLM', slm)]
    pipe = Pipeline(estimators)

    pipe.fit(X, y)
    Ey = pipe.predict(Xs)
    assert smse(ys, Ey) < 0.1 
Example #18
Source File: regression_multicollinearity.py    From practicalDataAnalysisCookbook with GNU General Public License v2.0 5 votes vote down vote up
def reduce_PCA(x, n):
    '''
        Reduce the dimensions using Principal Component
        Analysis 
    '''
    # create the PCA object
    pca = dc.PCA(n_components=n, whiten=True)

    # learn the principal components from all the features
    return pca.fit(x)

# the file name of the dataset 
Example #19
Source File: reduce_randomizedPCA.py    From practicalDataAnalysisCookbook with GNU General Public License v2.0 5 votes vote down vote up
def reduce_PCA(x):
    '''
        Reduce the dimensions using Principal Component
        Analysis 
    '''
    # create the PCA object
    pca = dc.PCA(n_components=2, whiten=True)

    # learn the principal components from all the features
    return pca.fit(x) 
Example #20
Source File: test_transformers.py    From gordo with GNU Affero General Public License v3.0 5 votes vote down vote up
def _validate_transformer(self, transformer):
        """
        Inserts a transformer into the middle of a pipeline and runs it
        """
        pipe = Pipeline([("pca1", PCA()), ("custom", transformer), ("pca2", PCA())])
        X = np.random.random(size=100).reshape(10, 10)
        pipe.fit_transform(X) 
Example #21
Source File: DimensionReduction.py    From FAE with GNU General Public License v3.0 5 votes vote down vote up
def SetRemainedNumber(self, number):
        super(DimensionReductionByPCA, self).SetRemainedNumber(number)
        super(DimensionReductionByPCA, self).SetModel(PCA(n_components=super(DimensionReductionByPCA, self).GetRemainedNumber())) 
Example #22
Source File: multipca.py    From PynPoint with GNU General Public License v3.0 5 votes vote down vote up
def create_processors(self) -> List[PcaTaskProcessor]:
        """
        Method to create a list of instances of PcaTaskProcessor.

        Returns
        -------
        list(pynpoint.util.multipca.PcaTaskProcessor, )
            PCA task processors.
        """

        processors = []

        for _ in range(self.m_num_proc):

            processors.append(PcaTaskProcessor(self.m_tasks_queue,
                                               self.m_result_queue,
                                               self.m_star_reshape,
                                               self.m_angles,
                                               self.m_scales,
                                               self.m_pca_model,
                                               self.m_im_shape,
                                               self.m_indices,
                                               self.m_requirements,
                                               self.m_processing_type))

        return processors 
Example #23
Source File: test_decompose.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_selective_pca():
    original = X
    cols = [original.columns[0]]  # Only perform on first...
    compare_cols = np.array(original[['sepal width (cm)', 'petal length (cm)',
                                      'petal width (cm)']].as_matrix())  # should be the same as the trans cols

    transformer = SelectivePCA(cols=cols, n_components=0.85).fit(original)
    transformed = transformer.transform(original)

    untouched_cols = np.array(transformed[['sepal width (cm)', 'petal length (cm)', 'petal width (cm)']].as_matrix())
    assert_array_almost_equal(compare_cols, untouched_cols)
    assert 'PC1' in transformed.columns
    assert transformed.shape[1] == 4
    assert isinstance(transformer.get_decomposition(), PCA)
    assert SelectivePCA().get_decomposition() is None

    # test the selective mixin
    assert isinstance(transformer.cols, list)

    # what if we want to weight it?
    pca_df = SelectivePCA(weight=True, n_components=0.99, as_df=False).fit_transform(original)
    pca_arr = SelectivePCA(weight=True, n_components=0.99, as_df=False).fit_transform(iris.data)
    assert_array_equal(pca_df, pca_arr)

    # hack to assert they are not equal if weighted
    pca_arr = SelectivePCA(weight=False, n_components=0.99, as_df=False).fit_transform(iris.data)
    assert_fails(assert_array_equal, AssertionError, pca_df, pca_arr) 
Example #24
Source File: decompose.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def score(self, X, y=None):
        """Return the average log-likelihood of all samples.
        This calls sklearn.decomposition.PCA's score method
        on the specified columns [1].

        Parameters
        ----------

        X: Pandas ``DataFrame``, shape=(n_samples, n_features)
            The data to score.

        y: None
            Passthrough for pipeline/gridsearch


        Returns
        -------

        ll: float
            Average log-likelihood of the samples under the fit
            PCA model (`self.pca_`)


        References
        ----------

        .. [1] Bishop, C.  "Pattern Recognition and Machine Learning"
               12.2.1 p. 574 http://www.miketipping.com/papers/met-mppca.pdf
        """
        check_is_fitted(self, 'pca_')
        X, _ = validate_is_pd(X, self.cols)
        cols = X.columns if not self.cols else self.cols

        ll = self.pca_.score(X[cols].as_matrix(), _as_numpy(y))
        return ll 
Example #25
Source File: decompose.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_decomposition(self):
        """Overridden from the :class:``skutil.decomposition.decompose._BaseSelectiveDecomposer`` class,
        this method returns the internal decomposition class: 
        ``sklearn.decomposition.PCA``

        Returns
        -------
        self.pca_ : ``sklearn.decomposition.PCA``
            The fit internal decomposition class
        """
        return self.pca_ if hasattr(self, 'pca_') else None 
Example #26
Source File: decompose.py    From skutil with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y=None):
        """Fit the transformer.

        Parameters
        ----------

        X : Pandas ``DataFrame``, shape=(n_samples, n_features)
            The Pandas frame to fit. The frame will only
            be fit on the prescribed ``cols`` (see ``__init__``) or
            all of them if ``cols`` is None. Furthermore, ``X`` will
            not be altered in the process of the fit.

        y : None
            Passthrough for ``sklearn.pipeline.Pipeline``. Even
            if explicitly set, will not change behavior of ``fit``.

        Returns
        -------

        self
        """
        # check on state of X and cols
        X, self.cols = validate_is_pd(X, self.cols)
        cols = _cols_if_none(X, self.cols)

        # fails thru if names don't exist:
        self.pca_ = PCA(
            n_components=self.n_components,
            whiten=self.whiten).fit(X[cols].as_matrix())

        return self 
Example #27
Source File: utils.py    From MNIST-baselines with MIT License 5 votes vote down vote up
def skPCA(data, dim):
    model = PCA(n_components=dim)
    model.fit(data)
    return model.transform(data) 
Example #28
Source File: OptimalProjection.py    From scattertext with Apache License 2.0 5 votes vote down vote up
def get_optimal_category_projection_by_rank(
        corpus,
        n_dims=2,
        n_steps=20,
        projector=lambda rank, n_dims: CategoryProjector(AssociationCompactorByRank(rank),
                                                         projector=PCA(n_dims)),
        verbose=False
):
    try:
        from astropy.stats import RipleysKEstimator
    except:
        raise Exception("Please install astropy")

    ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.)
    min_dev = None
    best_rank = None
    best_x = None
    best_y = None
    best_projector = None
    for rank in np.linspace(1, TermCategoryRanker().get_max_rank(corpus), n_steps):

        r = np.linspace(0, np.sqrt(2), 100)
        category_projector = projector(rank, n_dims)
        category_projection = category_projector.project(corpus)
        for dim_1 in range(0, n_dims):
            for dim_2 in range(dim_1 + 1, n_dims):
                proj = category_projection.projection[:, [dim_1, dim_2]]
                scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T
                dev = np.sum(np.abs(ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r)))
                if min_dev is None or dev < min_dev:
                    min_dev = dev
                    best_rank = rank
                    best_projector = category_projector
                    best_x, best_y = (dim_1, dim_2)
                if verbose:
                    print('rank', rank, 'dims', dim_1, dim_2, 'K', dev)
                    print('     best rank', best_rank, 'dims', best_x, best_y, 'K', min_dev)
    if verbose:
        print(best_rank, best_x, best_y)
    return best_projector.project(corpus, best_x, best_y) 
Example #29
Source File: util.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def kmean_pca_batch(data, batch, k=10):
    data = np.asarray(data, dtype=np.float32)
    batch = np.asarray(batch, dtype=np.float32)
    a = np.zeros(batch.shape[0])
    for i in np.arange(batch.shape[0]):
        tmp = np.concatenate((data, [batch[i]]))
        tmp_pca = PCA(n_components=2).fit_transform(tmp)
        a[i] = mle_single(tmp_pca[:-1], tmp_pca[-1], k=k)
    return a 
Example #30
Source File: pca.py    From classification-of-encrypted-traffic with MIT License 5 votes vote down vote up
def runpca(X, num_comp=None):
    pca = PCA(n_components=num_comp, svd_solver='full')
    pca.fit(X)
    # print(pca.n_components_)
    # print(pca.explained_variance_ratio_)
    # print(sum(pca.explained_variance_ratio_))
    return pca