Python sklearn.decomposition.PCA Examples
The following are 30
code examples of sklearn.decomposition.PCA().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.decomposition
, or try the search function
.
Example #1
Source File: embedding.py From DeepDIVA with GNU Lesser General Public License v3.0 | 6 votes |
def pca(features, n_components=2): """ Returns the embedded points for PCA. Parameters ---------- features: numpy.ndarray contains the input feature vectors. n_components: int number of components to transform the features into Returns ------- embedding: numpy.ndarray x,y(z) points that the feature vectors have been transformed into """ embedding = PCA(n_components=n_components).fit_transform(features) return embedding ########################################################################################################################
Example #2
Source File: CategoryProjector.py From scattertext with Apache License 2.0 | 6 votes |
def __init__(self, weighter=LengthNormalizer(), normalizer=StandardScaler(), selector=AssociationCompactor(1000, RankDifference), projector=PCA(2)): ''' :param weighter: instance of an sklearn class with fit_transform to weight X category corpus. :param normalizer: instance of an sklearn class with fit_transform to normalize term X category corpus. :param selector: instance of a compactor class, if None, no compaction will be done. :param projector: instance an sklearn class with fit_transform ''' self.weighter_ = weighter self.normalizer_ = normalizer self.selector_ = selector self.projector_ = projector
Example #3
Source File: dataset.py From neural-combinatorial-optimization-rl-tensorflow with MIT License | 6 votes |
def gen_instance(self, max_length, dimension, test_mode=True, seed=0): if seed!=0: np.random.seed(seed) # Randomly generate (max_length) cities with (dimension) coordinates in [0,100] seq = np.random.randint(100, size=(max_length, dimension)) # Principal Component Analysis to center & rotate coordinates pca = PCA(n_components=dimension) sequence = pca.fit_transform(seq) # Scale to [0,1[ input_ = sequence/100 if test_mode == True: return input_, seq else: return input_ # Generate random batch for training procedure
Example #4
Source File: CategoryProjector.py From scattertext with Apache License 2.0 | 6 votes |
def __init__(self, doc2vec_builder=None, projector=PCA(2)): ''' :param doc2vec_builder: Doc2VecBuilder, optional If None, a default model will be used :param projector: object Has fit_transform method ''' if doc2vec_builder is None: try: import gensim except: raise Exception("Please install gensim before using Doc2VecCategoryProjector/") self.doc2vec_builder = Doc2VecBuilder( gensim.models.Doc2Vec(vector_size=100, window=5, min_count=5, workers=6, alpha=0.025, min_alpha=0.025, epochs=50) ) else: assert type(doc2vec_builder) == Doc2VecBuilder self.doc2vec_builder = doc2vec_builder self.projector = projector
Example #5
Source File: embedding.py From BrainSpace with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, x): """ Compute PCA. Parameters ---------- x : ndarray, shape(n_samples, n_feat) Input matrix. Returns ------- self : object Returns self. """ pca = PCA(n_components=self.n_components, random_state=self.random_state) self.maps_ = pca.fit_transform(x) self.lambdas_ = pca.explained_variance_ return self
Example #6
Source File: cluster_features.py From bert-extractive-summarizer with MIT License | 6 votes |
def __init__( self, features: ndarray, algorithm: str = 'kmeans', pca_k: int = None, random_state: int = 12345 ): """ :param features: the embedding matrix created by bert parent :param algorithm: Which clustering algorithm to use :param pca_k: If you want the features to be ran through pca, this is the components number :param random_state: Random state """ if pca_k: self.features = PCA(n_components=pca_k).fit_transform(features) else: self.features = features self.algorithm = algorithm self.pca_k = pca_k self.random_state = random_state
Example #7
Source File: multipca.py From PynPoint with GNU General Public License v3.0 | 6 votes |
def create_writer(self, image_out_port: None) -> PcaTaskWriter: """ Method to create an instance of PcaTaskWriter. Parameters ---------- image_out_port : None Output port, not used. Returns ------- pynpoint.util.multipca.PcaTaskWriter PCA task writer. """ return PcaTaskWriter(self.m_result_queue, self.m_mean_out_port, self.m_median_out_port, self.m_weighted_out_port, self.m_clip_out_port, self.m_data_mutex, self.m_requirements)
Example #8
Source File: multipca.py From PynPoint with GNU General Public License v3.0 | 6 votes |
def init_creator(self, image_in_port: None) -> PcaTaskCreator: """ Method to create an instance of PcaTaskCreator. Parameters ---------- image_in_port : None Input port, not used. Returns ------- pynpoint.util.multipca.PcaTaskCreator PCA task creator. """ return PcaTaskCreator(self.m_tasks_queue, self.m_num_proc, self.m_pca_numbers)
Example #9
Source File: post_proc.py From HorizonNet with MIT License | 6 votes |
def get_rot_rad(init_coorx, coory, z=50, coorW=1024, coorH=512, floorW=1024, floorH=512, tol=5): gpid = get_gpid(init_coorx, coorW) coor = np.hstack([np.arange(coorW)[:, None], coory[:, None]]) xy = np_coor2xy(coor, z, coorW, coorH, floorW, floorH) xy_cor = [] rot_rad_suggestions = [] for j in range(len(init_coorx)): pca = PCA(n_components=1) pca.fit(xy[gpid == j]) rot_rad_suggestions.append(_get_rot_rad(*pca.components_[0])) rot_rad_suggestions = np.sort(rot_rad_suggestions + [1e9]) rot_rad = np.mean(rot_rad_suggestions[:-1]) best_rot_rad_sz = -1 last_j = 0 for j in range(1, len(rot_rad_suggestions)): if rot_rad_suggestions[j] - rot_rad_suggestions[j-1] > tol: last_j = j elif j - last_j > best_rot_rad_sz: rot_rad = rot_rad_suggestions[last_j:j+1].mean() best_rot_rad_sz = j - last_j dx = int(round(rot_rad * 1024 / 360)) return dx, rot_rad
Example #10
Source File: DimensionReduction.py From FAE with GNU General Public License v3.0 | 6 votes |
def Transform(self, data_container, store_folder='', store_key=''): data = data_container.GetArray() if data.shape[1] != self.GetModel().components_.shape[1]: print('Data can not be transformed by existed PCA') sub_data = self.GetModel().transform(data) sub_feature_name = ['PCA_feature_' + str(index) for index in range(1, super(DimensionReductionByPCA, self).GetRemainedNumber() + 1)] new_data_container = deepcopy(data_container) new_data_container.SetArray(sub_data) new_data_container.SetFeatureName(sub_feature_name) new_data_container.UpdateFrameByData() if store_folder: self.SaveDataContainer(data_container, store_folder, store_key) return new_data_container
Example #11
Source File: edgeConstruction.py From DCC with MIT License | 6 votes |
def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Feature extraction for RCC algorithm') parser.add_argument('--dataset', default=None, type=str, help='The entered dataset file must be in the Data folder') parser.add_argument('--prep', dest='prep', default='none', type=str, help='preprocessing of data: scale,minmax,normalization,none') parser.add_argument('--algo', dest='algo', default='mknn', type=str, help='Algorithm to use: knn,mknn') parser.add_argument('--k', dest='k', default=10, type=int, help='Number of nearest neighbor to consider') parser.add_argument('--pca', dest='pca', default=None, type=int, help='Dimension of PCA processing before kNN graph construction') parser.add_argument('--samples', dest='nsamples', default=0, type=int, help='total samples to consider') parser.add_argument('--format', choices=['mat', 'pkl', 'h5'], default='mat', help='Dataset format') args = parser.parse_args() return args
Example #12
Source File: data_utils.py From CalibrationNN with GNU General Public License v3.0 | 6 votes |
def pca(self, **kwargs): if 'n_components' in kwargs: nComp = kwargs['n_components'] else: nComp = 0.995 if 'dates' in kwargs: mat = self.to_matrix(kwargs['dates']) else: mat = self.to_matrix() scaler = StandardScaler() pca = PCA(n_components=nComp) self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)]) self._pipeline.fit(mat) if 'file' in kwargs: tofile(kwargs['file'], self._pipeline) return self._pipeline
Example #13
Source File: example7.py From bert-as-service with MIT License | 6 votes |
def vis(embed, vis_alg='PCA', pool_alg='REDUCE_MEAN'): plt.close() fig = plt.figure() plt.rcParams['figure.figsize'] = [21, 7] for idx, ebd in enumerate(embed): ax = plt.subplot(2, 6, idx + 1) vis_x = ebd[:, 0] vis_y = ebd[:, 1] plt.scatter(vis_x, vis_y, c=subset_label, cmap=ListedColormap(["blue", "green", "yellow", "red"]), marker='.', alpha=0.7, s=2) ax.set_title('pool_layer=-%d' % (idx + 1)) plt.tight_layout() plt.subplots_adjust(bottom=0.1, right=0.95, top=0.9) cax = plt.axes([0.96, 0.1, 0.01, 0.3]) cbar = plt.colorbar(cax=cax, ticks=range(num_label)) cbar.ax.get_yaxis().set_ticks([]) for j, lab in enumerate(['ent.', 'bus.', 'sci.', 'heal.']): cbar.ax.text(.5, (2 * j + 1) / 8.0, lab, ha='center', va='center', rotation=270) fig.suptitle('%s visualization of BERT layers using "bert-as-service" (-pool_strategy=%s)' % (vis_alg, pool_alg), fontsize=14) plt.show()
Example #14
Source File: fisher_iris_visualization.py From blender-scripting with MIT License | 6 votes |
def PCA(data, num_components=None): # mean center the data data -= data.mean(axis=0) # calculate the covariance matrix R = np.cov(data, rowvar=False) # calculate eigenvectors & eigenvalues of the covariance matrix # use 'eigh' rather than 'eig' since R is symmetric, # the performance gain is substantial V, E = np.linalg.eigh(R) # sort eigenvalue in decreasing order idx = np.argsort(V)[::-1] E = E[:,idx] # sort eigenvectors according to same index V = V[idx] # select the first n eigenvectors (n is desired dimension # of rescaled data array, or dims_rescaled_data) E = E[:, :num_components] # carry out the transformation on the data using eigenvectors # and return the re-scaled data, eigenvalues, and eigenvectors return np.dot(E.T, data.T).T, V, E
Example #15
Source File: neuagent.py From dl4ir-webnav with BSD 3-Clause "New" or "Revised" License | 6 votes |
def load_wemb(params, vocab): wemb = pkl.load(open(prm.wordemb_path, 'rb')) dim_emb_orig = wemb.values()[0].shape[0] W = 0.01 * np.random.randn(prm.n_words, dim_emb_orig).astype(config.floatX) for word, pos in vocab.items(): if word in wemb: W[pos,:] = wemb[word] if prm.dim_emb < dim_emb_orig: pca =PCA(n_components=prm.dim_emb, copy=False, whiten=True) W = pca.fit_transform(W) params['W'] = W return params
Example #16
Source File: DimensionReduction.py From FAE with GNU General Public License v3.0 | 5 votes |
def GetDescription(self): text = "Since the dimension of feature space was high, we applied principle component analysis (PCA) on the feature matrix. " \ "The feature vector of the transformed feature matrix was independent to each other. " return text
Example #17
Source File: test_models.py From revrand with Apache License 2.0 | 5 votes |
def test_pipeline_slm(make_gaus_data): X, y, Xs, ys = make_gaus_data slm = StandardLinearModel(LinearBasis(onescol=True)) estimators = [('PCA', PCA()), ('SLM', slm)] pipe = Pipeline(estimators) pipe.fit(X, y) Ey = pipe.predict(Xs) assert smse(ys, Ey) < 0.1
Example #18
Source File: regression_multicollinearity.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 5 votes |
def reduce_PCA(x, n): ''' Reduce the dimensions using Principal Component Analysis ''' # create the PCA object pca = dc.PCA(n_components=n, whiten=True) # learn the principal components from all the features return pca.fit(x) # the file name of the dataset
Example #19
Source File: reduce_randomizedPCA.py From practicalDataAnalysisCookbook with GNU General Public License v2.0 | 5 votes |
def reduce_PCA(x): ''' Reduce the dimensions using Principal Component Analysis ''' # create the PCA object pca = dc.PCA(n_components=2, whiten=True) # learn the principal components from all the features return pca.fit(x)
Example #20
Source File: test_transformers.py From gordo with GNU Affero General Public License v3.0 | 5 votes |
def _validate_transformer(self, transformer): """ Inserts a transformer into the middle of a pipeline and runs it """ pipe = Pipeline([("pca1", PCA()), ("custom", transformer), ("pca2", PCA())]) X = np.random.random(size=100).reshape(10, 10) pipe.fit_transform(X)
Example #21
Source File: DimensionReduction.py From FAE with GNU General Public License v3.0 | 5 votes |
def SetRemainedNumber(self, number): super(DimensionReductionByPCA, self).SetRemainedNumber(number) super(DimensionReductionByPCA, self).SetModel(PCA(n_components=super(DimensionReductionByPCA, self).GetRemainedNumber()))
Example #22
Source File: multipca.py From PynPoint with GNU General Public License v3.0 | 5 votes |
def create_processors(self) -> List[PcaTaskProcessor]: """ Method to create a list of instances of PcaTaskProcessor. Returns ------- list(pynpoint.util.multipca.PcaTaskProcessor, ) PCA task processors. """ processors = [] for _ in range(self.m_num_proc): processors.append(PcaTaskProcessor(self.m_tasks_queue, self.m_result_queue, self.m_star_reshape, self.m_angles, self.m_scales, self.m_pca_model, self.m_im_shape, self.m_indices, self.m_requirements, self.m_processing_type)) return processors
Example #23
Source File: test_decompose.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_selective_pca(): original = X cols = [original.columns[0]] # Only perform on first... compare_cols = np.array(original[['sepal width (cm)', 'petal length (cm)', 'petal width (cm)']].as_matrix()) # should be the same as the trans cols transformer = SelectivePCA(cols=cols, n_components=0.85).fit(original) transformed = transformer.transform(original) untouched_cols = np.array(transformed[['sepal width (cm)', 'petal length (cm)', 'petal width (cm)']].as_matrix()) assert_array_almost_equal(compare_cols, untouched_cols) assert 'PC1' in transformed.columns assert transformed.shape[1] == 4 assert isinstance(transformer.get_decomposition(), PCA) assert SelectivePCA().get_decomposition() is None # test the selective mixin assert isinstance(transformer.cols, list) # what if we want to weight it? pca_df = SelectivePCA(weight=True, n_components=0.99, as_df=False).fit_transform(original) pca_arr = SelectivePCA(weight=True, n_components=0.99, as_df=False).fit_transform(iris.data) assert_array_equal(pca_df, pca_arr) # hack to assert they are not equal if weighted pca_arr = SelectivePCA(weight=False, n_components=0.99, as_df=False).fit_transform(iris.data) assert_fails(assert_array_equal, AssertionError, pca_df, pca_arr)
Example #24
Source File: decompose.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def score(self, X, y=None): """Return the average log-likelihood of all samples. This calls sklearn.decomposition.PCA's score method on the specified columns [1]. Parameters ---------- X: Pandas ``DataFrame``, shape=(n_samples, n_features) The data to score. y: None Passthrough for pipeline/gridsearch Returns ------- ll: float Average log-likelihood of the samples under the fit PCA model (`self.pca_`) References ---------- .. [1] Bishop, C. "Pattern Recognition and Machine Learning" 12.2.1 p. 574 http://www.miketipping.com/papers/met-mppca.pdf """ check_is_fitted(self, 'pca_') X, _ = validate_is_pd(X, self.cols) cols = X.columns if not self.cols else self.cols ll = self.pca_.score(X[cols].as_matrix(), _as_numpy(y)) return ll
Example #25
Source File: decompose.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_decomposition(self): """Overridden from the :class:``skutil.decomposition.decompose._BaseSelectiveDecomposer`` class, this method returns the internal decomposition class: ``sklearn.decomposition.PCA`` Returns ------- self.pca_ : ``sklearn.decomposition.PCA`` The fit internal decomposition class """ return self.pca_ if hasattr(self, 'pca_') else None
Example #26
Source File: decompose.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y=None): """Fit the transformer. Parameters ---------- X : Pandas ``DataFrame``, shape=(n_samples, n_features) The Pandas frame to fit. The frame will only be fit on the prescribed ``cols`` (see ``__init__``) or all of them if ``cols`` is None. Furthermore, ``X`` will not be altered in the process of the fit. y : None Passthrough for ``sklearn.pipeline.Pipeline``. Even if explicitly set, will not change behavior of ``fit``. Returns ------- self """ # check on state of X and cols X, self.cols = validate_is_pd(X, self.cols) cols = _cols_if_none(X, self.cols) # fails thru if names don't exist: self.pca_ = PCA( n_components=self.n_components, whiten=self.whiten).fit(X[cols].as_matrix()) return self
Example #27
Source File: utils.py From MNIST-baselines with MIT License | 5 votes |
def skPCA(data, dim): model = PCA(n_components=dim) model.fit(data) return model.transform(data)
Example #28
Source File: OptimalProjection.py From scattertext with Apache License 2.0 | 5 votes |
def get_optimal_category_projection_by_rank( corpus, n_dims=2, n_steps=20, projector=lambda rank, n_dims: CategoryProjector(AssociationCompactorByRank(rank), projector=PCA(n_dims)), verbose=False ): try: from astropy.stats import RipleysKEstimator except: raise Exception("Please install astropy") ripley = RipleysKEstimator(area=1., x_max=1., y_max=1., x_min=0., y_min=0.) min_dev = None best_rank = None best_x = None best_y = None best_projector = None for rank in np.linspace(1, TermCategoryRanker().get_max_rank(corpus), n_steps): r = np.linspace(0, np.sqrt(2), 100) category_projector = projector(rank, n_dims) category_projection = category_projector.project(corpus) for dim_1 in range(0, n_dims): for dim_2 in range(dim_1 + 1, n_dims): proj = category_projection.projection[:, [dim_1, dim_2]] scaled_proj = np.array([stretch_0_to_1(proj.T[0]), stretch_0_to_1(proj.T[1])]).T dev = np.sum(np.abs(ripley(scaled_proj, r, mode='ripley') - ripley.poisson(r))) if min_dev is None or dev < min_dev: min_dev = dev best_rank = rank best_projector = category_projector best_x, best_y = (dim_1, dim_2) if verbose: print('rank', rank, 'dims', dim_1, dim_2, 'K', dev) print(' best rank', best_rank, 'dims', best_x, best_y, 'K', min_dev) if verbose: print(best_rank, best_x, best_y) return best_projector.project(corpus, best_x, best_y)
Example #29
Source File: util.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 5 votes |
def kmean_pca_batch(data, batch, k=10): data = np.asarray(data, dtype=np.float32) batch = np.asarray(batch, dtype=np.float32) a = np.zeros(batch.shape[0]) for i in np.arange(batch.shape[0]): tmp = np.concatenate((data, [batch[i]])) tmp_pca = PCA(n_components=2).fit_transform(tmp) a[i] = mle_single(tmp_pca[:-1], tmp_pca[-1], k=k) return a
Example #30
Source File: pca.py From classification-of-encrypted-traffic with MIT License | 5 votes |
def runpca(X, num_comp=None): pca = PCA(n_components=num_comp, svd_solver='full') pca.fit(X) # print(pca.n_components_) # print(pca.explained_variance_ratio_) # print(sum(pca.explained_variance_ratio_)) return pca