Python sklearn.decomposition.PCA Examples

The following are 30 code examples for showing how to use sklearn.decomposition.PCA(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.decomposition , or try the search function .

Example 1
Project: neural-combinatorial-optimization-rl-tensorflow   Author: MichelDeudon   File: dataset.py    License: MIT License 6 votes vote down vote up
def gen_instance(self, max_length, dimension, test_mode=True, seed=0):
        if seed!=0: np.random.seed(seed)

        # Randomly generate (max_length) cities with (dimension) coordinates in [0,100]
        seq = np.random.randint(100, size=(max_length, dimension))

        # Principal Component Analysis to center & rotate coordinates
        pca = PCA(n_components=dimension)
        sequence = pca.fit_transform(seq)

        # Scale to [0,1[
        input_ = sequence/100

        if test_mode == True:
            return input_, seq
        else:
            return input_

    # Generate random batch for training procedure 
Example 2
Project: transferlearning   Author: jindongwang   File: intra_alignment.py    License: MIT License 6 votes vote down vote up
def getGFKDim(Xs, Xt):
    Pss = PCA().fit(Xs).components_.T
    Pts = PCA().fit(Xt).components_.T
    Psstt = PCA().fit(np.vstack((Xs, Xt))).components_.T
    
    DIM = round(Xs.shape[1]*0.5)
    res = -1
    
    for d in range(1, DIM+1):
        Ps = Pss[:, :d]
        Pt = Pts[:, :d]
        Pst = Psstt[:, :d]
        alpha1 = getAngle(Ps, Pst, d)
        alpha2 = getAngle(Pt, Pst, d)
        D = (alpha1 + alpha2) * 0.5
        check = [round(D[1, dd]*100) == 100 for dd in range(d)]
        if True in check:
            res = list(map(lambda i: i == True, check)).index(True) 
            return res 
Example 3
Project: HorizonNet   Author: sunset1995   File: post_proc.py    License: MIT License 6 votes vote down vote up
def get_rot_rad(init_coorx, coory, z=50, coorW=1024, coorH=512, floorW=1024, floorH=512, tol=5):
    gpid = get_gpid(init_coorx, coorW)
    coor = np.hstack([np.arange(coorW)[:, None], coory[:, None]])
    xy = np_coor2xy(coor, z, coorW, coorH, floorW, floorH)
    xy_cor = []

    rot_rad_suggestions = []
    for j in range(len(init_coorx)):
        pca = PCA(n_components=1)
        pca.fit(xy[gpid == j])
        rot_rad_suggestions.append(_get_rot_rad(*pca.components_[0]))
    rot_rad_suggestions = np.sort(rot_rad_suggestions + [1e9])

    rot_rad = np.mean(rot_rad_suggestions[:-1])
    best_rot_rad_sz = -1
    last_j = 0
    for j in range(1, len(rot_rad_suggestions)):
        if rot_rad_suggestions[j] - rot_rad_suggestions[j-1] > tol:
            last_j = j
        elif j - last_j > best_rot_rad_sz:
            rot_rad = rot_rad_suggestions[last_j:j+1].mean()
            best_rot_rad_sz = j - last_j

    dx = int(round(rot_rad * 1024 / 360))
    return dx, rot_rad 
Example 4
Project: CalibrationNN   Author: Andres-Hernandez   File: data_utils.py    License: GNU General Public License v3.0 6 votes vote down vote up
def pca(self, **kwargs):
        if 'n_components' in kwargs:
            nComp = kwargs['n_components']
        else:
            nComp = 0.995

        if 'dates' in kwargs:
            mat = self.to_matrix(kwargs['dates'])
        else:
            mat = self.to_matrix()
        scaler = StandardScaler()
        pca = PCA(n_components=nComp)
        self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
        self._pipeline.fit(mat)
        
        if 'file' in kwargs:
            tofile(kwargs['file'], self._pipeline)
        
        return self._pipeline 
Example 5
Project: scattertext   Author: JasonKessler   File: CategoryProjector.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self,
                 weighter=LengthNormalizer(),
                 normalizer=StandardScaler(),
                 selector=AssociationCompactor(1000, RankDifference),
                 projector=PCA(2)):
        '''

        :param weighter: instance of an sklearn class with fit_transform to weight X category corpus.
        :param normalizer: instance of an sklearn class with fit_transform to normalize term X category corpus.
        :param selector: instance of a compactor class, if None, no compaction will be done.
        :param projector: instance an sklearn class with fit_transform
        '''
        self.weighter_ = weighter
        self.normalizer_ = normalizer
        self.selector_ = selector
        self.projector_ = projector 
Example 6
Project: scattertext   Author: JasonKessler   File: CategoryProjector.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, doc2vec_builder=None, projector=PCA(2)):
        '''

        :param doc2vec_builder: Doc2VecBuilder, optional
            If None, a default model will be used
        :param projector: object
            Has fit_transform method
        '''
        if doc2vec_builder is None:
            try:
                import gensim
            except:
                raise Exception("Please install gensim before using Doc2VecCategoryProjector/")
            self.doc2vec_builder = Doc2VecBuilder(
                gensim.models.Doc2Vec(vector_size=100, window=5, min_count=5, workers=6, alpha=0.025,
                                      min_alpha=0.025, epochs=50)
            )
        else:
            assert type(doc2vec_builder) == Doc2VecBuilder
            self.doc2vec_builder = doc2vec_builder
        self.projector = projector 
Example 7
Project: DCC   Author: shahsohil   File: edgeConstruction.py    License: MIT License 6 votes vote down vote up
def parse_args():
    """ Parse input arguments """
    parser = argparse.ArgumentParser(description='Feature extraction for RCC algorithm')

    parser.add_argument('--dataset', default=None, type=str,
                        help='The entered dataset file must be in the Data folder')
    parser.add_argument('--prep', dest='prep', default='none', type=str,
                        help='preprocessing of data: scale,minmax,normalization,none')
    parser.add_argument('--algo', dest='algo', default='mknn', type=str,
                        help='Algorithm to use: knn,mknn')
    parser.add_argument('--k', dest='k', default=10, type=int,
                        help='Number of nearest neighbor to consider')
    parser.add_argument('--pca', dest='pca', default=None, type=int,
                        help='Dimension of PCA processing before kNN graph construction')
    parser.add_argument('--samples', dest='nsamples', default=0, type=int,
                        help='total samples to consider')
    parser.add_argument('--format', choices=['mat', 'pkl', 'h5'], default='mat', help='Dataset format')

    args = parser.parse_args()
    return args 
Example 8
Project: DeepDIVA   Author: DIVA-DIA   File: embedding.py    License: GNU Lesser General Public License v3.0 6 votes vote down vote up
def pca(features, n_components=2):
    """
    Returns the embedded points for PCA.
    Parameters
    ----------
    features: numpy.ndarray
        contains the input feature vectors.
    n_components: int
        number of components to transform the features into

    Returns
    -------
    embedding: numpy.ndarray
        x,y(z) points that the feature vectors have been transformed into
    """
    embedding = PCA(n_components=n_components).fit_transform(features)
    return embedding

######################################################################################################################## 
Example 9
Project: PynPoint   Author: PynPoint   File: multipca.py    License: GNU General Public License v3.0 6 votes vote down vote up
def create_writer(self,
                      image_out_port: None) -> PcaTaskWriter:
        """
        Method to create an instance of PcaTaskWriter.

        Parameters
        ----------
        image_out_port : None
            Output port, not used.

        Returns
        -------
        pynpoint.util.multipca.PcaTaskWriter
            PCA task writer.
        """

        return PcaTaskWriter(self.m_result_queue,
                             self.m_mean_out_port,
                             self.m_median_out_port,
                             self.m_weighted_out_port,
                             self.m_clip_out_port,
                             self.m_data_mutex,
                             self.m_requirements) 
Example 10
Project: PynPoint   Author: PynPoint   File: multipca.py    License: GNU General Public License v3.0 6 votes vote down vote up
def init_creator(self,
                     image_in_port: None) -> PcaTaskCreator:
        """
        Method to create an instance of PcaTaskCreator.

        Parameters
        ----------
        image_in_port : None
            Input port, not used.

        Returns
        -------
        pynpoint.util.multipca.PcaTaskCreator
            PCA task creator.
        """

        return PcaTaskCreator(self.m_tasks_queue,
                              self.m_num_proc,
                              self.m_pca_numbers) 
Example 11
Project: bert-as-service   Author: hanxiao   File: example7.py    License: MIT License 6 votes vote down vote up
def vis(embed, vis_alg='PCA', pool_alg='REDUCE_MEAN'):
    plt.close()
    fig = plt.figure()
    plt.rcParams['figure.figsize'] = [21, 7]
    for idx, ebd in enumerate(embed):
        ax = plt.subplot(2, 6, idx + 1)
        vis_x = ebd[:, 0]
        vis_y = ebd[:, 1]
        plt.scatter(vis_x, vis_y, c=subset_label, cmap=ListedColormap(["blue", "green", "yellow", "red"]), marker='.',
                    alpha=0.7, s=2)
        ax.set_title('pool_layer=-%d' % (idx + 1))
    plt.tight_layout()
    plt.subplots_adjust(bottom=0.1, right=0.95, top=0.9)
    cax = plt.axes([0.96, 0.1, 0.01, 0.3])
    cbar = plt.colorbar(cax=cax, ticks=range(num_label))
    cbar.ax.get_yaxis().set_ticks([])
    for j, lab in enumerate(['ent.', 'bus.', 'sci.', 'heal.']):
        cbar.ax.text(.5, (2 * j + 1) / 8.0, lab, ha='center', va='center', rotation=270)
    fig.suptitle('%s visualization of BERT layers using "bert-as-service" (-pool_strategy=%s)' % (vis_alg, pool_alg),
                 fontsize=14)
    plt.show() 
Example 12
Project: dl4ir-webnav   Author: nyu-dl   File: neuagent.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_wemb(params, vocab):
    wemb = pkl.load(open(prm.wordemb_path, 'rb'))
    dim_emb_orig = wemb.values()[0].shape[0]

    W = 0.01 * np.random.randn(prm.n_words, dim_emb_orig).astype(config.floatX)
    for word, pos in vocab.items():
        if word in wemb:
            W[pos,:] = wemb[word]
    
    if prm.dim_emb < dim_emb_orig:
        pca =PCA(n_components=prm.dim_emb, copy=False, whiten=True)
        W = pca.fit_transform(W)

    params['W'] = W

    return params 
Example 13
Project: blender-scripting   Author: njanakiev   File: fisher_iris_visualization.py    License: MIT License 6 votes vote down vote up
def PCA(data, num_components=None):
    # mean center the data
    data -= data.mean(axis=0)
    # calculate the covariance matrix
    R = np.cov(data, rowvar=False)
    # calculate eigenvectors & eigenvalues of the covariance matrix
    # use 'eigh' rather than 'eig' since R is symmetric,
    # the performance gain is substantial
    V, E = np.linalg.eigh(R)
    # sort eigenvalue in decreasing order
    idx = np.argsort(V)[::-1]
    E = E[:,idx]
    # sort eigenvectors according to same index
    V = V[idx]
    # select the first n eigenvectors (n is desired dimension
    # of rescaled data array, or dims_rescaled_data)
    E = E[:, :num_components]
    # carry out the transformation on the data using eigenvectors
    # and return the re-scaled data, eigenvalues, and eigenvectors
    return np.dot(E.T, data.T).T, V, E 
Example 14
Project: FAE   Author: salan668   File: DimensionReduction.py    License: GNU General Public License v3.0 6 votes vote down vote up
def Transform(self, data_container, store_folder='', store_key=''):
        data = data_container.GetArray()
        if data.shape[1] != self.GetModel().components_.shape[1]:
            print('Data can not be transformed by existed PCA')
        sub_data = self.GetModel().transform(data)

        sub_feature_name = ['PCA_feature_' + str(index) for index in
                            range(1, super(DimensionReductionByPCA, self).GetRemainedNumber() + 1)]

        new_data_container = deepcopy(data_container)
        new_data_container.SetArray(sub_data)
        new_data_container.SetFeatureName(sub_feature_name)
        new_data_container.UpdateFrameByData()

        if store_folder:
            self.SaveDataContainer(data_container, store_folder, store_key)

        return new_data_container 
Example 15
Project: bert-extractive-summarizer   Author: dmmiller612   File: cluster_features.py    License: MIT License 6 votes vote down vote up
def __init__(
        self,
        features: ndarray,
        algorithm: str = 'kmeans',
        pca_k: int = None,
        random_state: int = 12345
    ):
        """
        :param features: the embedding matrix created by bert parent
        :param algorithm: Which clustering algorithm to use
        :param pca_k: If you want the features to be ran through pca, this is the components number
        :param random_state: Random state
        """

        if pca_k:
            self.features = PCA(n_components=pca_k).fit_transform(features)
        else:
            self.features = features

        self.algorithm = algorithm
        self.pca_k = pca_k
        self.random_state = random_state 
Example 16
Project: BrainSpace   Author: MICA-MNI   File: embedding.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, x):
        """ Compute PCA.

        Parameters
        ----------
        x : ndarray, shape(n_samples, n_feat)
            Input matrix.

        Returns
        -------
        self : object
            Returns self.

        """

        pca = PCA(n_components=self.n_components,
                  random_state=self.random_state)
        self.maps_ = pca.fit_transform(x)
        self.lambdas_ = pca.explained_variance_

        return self 
Example 17
Project: neural-fingerprinting   Author: StephanZheng   File: util.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def kmean_pca_batch(data, batch, k=10):
    data = np.asarray(data, dtype=np.float32)
    batch = np.asarray(batch, dtype=np.float32)
    a = np.zeros(batch.shape[0])
    for i in np.arange(batch.shape[0]):
        tmp = np.concatenate((data, [batch[i]]))
        tmp_pca = PCA(n_components=2).fit_transform(tmp)
        a[i] = mle_single(tmp_pca[:-1], tmp_pca[-1], k=k)
    return a 
Example 18
Project: transferlearning   Author: jindongwang   File: intra_alignment.py    License: MIT License 5 votes vote down vote up
def PCA_map(Xs, Xt):
    dim = getGFKDim(Xs, Xt)
    X = np.vstack((Xs, Xt))
    X_new = PCA().fit_transform(X)[:, :dim]
    Xs_new = X_new[:Xs.shape[0], :]
    Xt_new = X_new[Xs.shape[0]:, :]
    return Xs_new, Xt_new 
Example 19
Project: PHATE   Author: KrishnaswamyLab   File: mds.py    License: GNU General Public License v2.0 5 votes vote down vote up
def classic(D, n_components=2, random_state=None):
    """Fast CMDS using random SVD

    Parameters
    ----------
    D : array-like, shape=[n_samples, n_samples]
        pairwise distances

    n_components : int, optional (default: 2)
        number of dimensions in which to embed `D`

    random_state : int, RandomState or None, optional (default: None)
        numpy random state

    Returns
    -------
    Y : array-like, embedded data [n_sample, ndim]
    """
    _logger.debug(
        "Performing classic MDS on {} of shape {}...".format(type(D).__name__, D.shape)
    )
    D = D ** 2
    D = D - D.mean(axis=0)[None, :]
    D = D - D.mean(axis=1)[:, None]
    pca = PCA(
        n_components=n_components, svd_solver="randomized", random_state=random_state
    )
    Y = pca.fit_transform(D)
    return Y 
Example 20
Project: Generative-Latent-Optimization-Tensorflow   Author: clvrai   File: download.py    License: MIT License 5 votes vote down vote up
def pca_feature(X, d):
    X = X/255.
    from sklearn.decomposition import PCA
    X = np.reshape(X, (X.shape[0], np.prod(X.shape[1:])))
    pca = PCA(n_components=d)
    return pca.fit_transform(X) 
Example 21
Project: radiometric_normalization   Author: planetlabs   File: pca_filter.py    License: Apache License 2.0 5 votes vote down vote up
def pca_fit_and_filter_pixel_list(candidate_data, reference_data, parameters):
    ''' Performs PCA analysis, on the valid pixels and filters according
    to the distance from the principle eigenvector, for a single band.

    :param list candidate_band: A list of valid candidate data
    :param list reference_band: A list of coincident valid reference data
    :param pca_options parameters: Method specific parameters. Currently:
        threshold (float): Representing the width of the PCA filter

    :returns: A boolean list representing the pif pixels within valid_pixels
    '''
    fitted_pca = _pca_fit_single_band(candidate_data, reference_data)
    return _pca_filter_single_band(
        fitted_pca, candidate_data, reference_data, parameters.threshold) 
Example 22
Project: radiometric_normalization   Author: planetlabs   File: pca_filter.py    License: Apache License 2.0 5 votes vote down vote up
def _pca_fit_single_band(cand_valid, ref_valid):
    ''' Uses SK Learn PCA module to do PCA fit
    '''
    X = _numpy_array_from_2arrays(cand_valid, ref_valid)

    # SK Learn PCA
    pca = PCA(n_components=2)

    # Fit the points
    pca.fit(X)

    return pca 
Example 23
Project: radiometric_normalization   Author: planetlabs   File: pca_filter.py    License: Apache License 2.0 5 votes vote down vote up
def _pca_filter_single_band(pca, cand_valid, ref_valid, threshold):
    ''' Uses SciKit Learn PCA module to transform the data and filter
    '''
    major_pca_values = _pca_transform_get_only_major_values(
        pca, cand_valid, ref_valid)

    # Filter
    pixels_pass_filter = numpy.logical_and(
        major_pca_values >= (threshold * -1), major_pca_values <= threshold)

    return pixels_pass_filter 
Example 24
Project: deeplearn_hsi   Author: hantek   File: hsi_utils.py    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
def PCA_tramsform_img(img=None, n_principle=3):
    """
    This function trainsforms an HSI by 1-D PCA. PCA is fitted on the whole data
    and is conducted on the spectral dimension, rendering the image from size 
    length * width * dim to length * width * n_principle. 
    
    Parameters:
    img:                initial unregularizaed HSI.
    n_principle:        Target number of principles we want.
    
    Return:
    reg_img:            Regularized, transformed image.
    
    WARNNING: RELATIVE ENERGY BETWEEN PRINCIPLE COMPONENTS CHANGED IN THIS 
    IMPLEMENTATION. YOU MAY NEED TO ADD PENALTY MULTIPLIERS IN THE HIGHER NETWORKS
    TO REIMBURSE IT.
    """
    length = img.shape[0]
    width = img.shape[1]
    dim = img.shape[2]
    # reshape img, HORIZONTALLY strench the img, without changing the spectral dim.
    reshaped_img = numpy.asarray(img.reshape(length*width, dim), 
                                 dtype=theano.config.floatX)
    pca = PCA(n_components=n_principle)
    pca_img = pca.fit_transform(reshaped_img)
    
    # Regularization: Think about energy of each principles here.
    reg_img = scale_to_unit_interval(ndar=pca_img, eps=1e-8)
    reg_img = numpy.asarray(reg_img.reshape(length, width, n_principle), 
                            dtype=theano.config.floatX)
    energy_dist = pca.explained_variance_ratio_
    residual = 1 - numpy.sum(energy_dist[0: n_principle])
    return reg_img, energy_dist, residual 
Example 25
Project: hierarchical_loc   Author: ethz-asl   File: evaluation.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def retrieval(ref_descriptors, query_descriptors, max_num_nn, pca_dim=0):
    if pca_dim != 0:
        pca = PCA(n_components=pca_dim)
        ref_descriptors = normalize(pca.fit_transform(normalize(ref_descriptors)))
        query_descriptors = normalize(pca.transform(normalize(query_descriptors)))

    ref_tree = cKDTree(ref_descriptors)
    _, indices = ref_tree.query(query_descriptors, k=max_num_nn)
    return indices 
Example 26
Project: 2D-Motion-Retargeting   Author: ChrisWu1997   File: cluster.py    License: MIT License 5 votes vote down vote up
def tsne_on_pca(arr, is_PCA=True):
    """
    visualize through t-sne on pca reduced data
    :param arr: (nr_examples, nr_features)
    :return:
    """
    if is_PCA:
        pca_50 = PCA(n_components=50)
        arr = pca_50.fit_transform(arr)
    tsne_2 = TSNE(n_components=2)
    res = tsne_2.fit_transform(arr)
    return res 
Example 27
Project: xcessiv   Author: reiinakano   File: test_functions.py    License: Apache License 2.0 5 votes vote down vote up
def test_non_serializable_parameters(self):
        pipeline = Pipeline([('pca', PCA()), ('rf', RandomForestClassifier())])
        performance_dict, hyperparameters = functions.verify_estimator_class(
            pipeline,
            'predict_proba',
            dict(Accuracy=self.source),
            self.dataset_properties
        )
        assert functions.is_valid_json(hyperparameters) 
Example 28
Project: blackbox-attacks   Author: sunblaze-ucb   File: cifar10_query_based.py    License: MIT License 5 votes vote down vote up
def pca_components(X, dim):
    X = X.reshape((len(X), dim))
    pca = PCA(n_components=dim)
    pca.fit(X)

    U = (pca.components_).T
    U_norm = normalize(U, axis=0)

    return U_norm[:,:args.num_comp] 
Example 29
Project: blackbox-attacks   Author: sunblaze-ucb   File: query_based_attack.py    License: MIT License 5 votes vote down vote up
def pca_components(X, dim):
    X = X.reshape((len(X), dim))
    pca = PCA(n_components=dim)
    pca.fit(X)

    U = (pca.components_).T
    U_norm = normalize(U, axis=0)

    return U_norm[:,:args.num_comp] 
Example 30
Project: ibllib   Author: int-brain-lab   File: cca.py    License: MIT License 5 votes vote down vote up
def _pca(data, n_pcs):
    from sklearn.decomposition import PCA
    pca = PCA(n_components=n_pcs)
    pca.fit(data)
    data_pc = pca.transform(data)
    return data_pc