Python sklearn.decomposition() Examples

The following are 13 code examples of sklearn.decomposition(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn , or try the search function .
Example #1
Source File: decomposition.py    From tridesclous with MIT License 6 votes vote down vote up
def __init__(self, catalogueconstructor=None, selection=None, n_components=5, **params):

        cc = catalogueconstructor
        
        self.n_components = n_components
        
        self.waveforms = cc.get_some_waveforms()
        
        if selection is None:
            waveforms = self.waveforms
            #~ print('all selection', waveforms.shape[0])
        else:
            peaks_index, = np.nonzero(selection)
            waveforms = cc.get_some_waveforms(peaks_index=peaks_index)
            #~ print('subset selection', waveforms.shape[0])
            
        flatten_waveforms = waveforms.reshape(waveforms.shape[0], -1)
        #~ self.pca =  sklearn.decomposition.IncrementalPCA(n_components=n_components, **params)
        self.pca =  sklearn.decomposition.TruncatedSVD(n_components=n_components, **params)
        self.pca.fit(flatten_waveforms)
        
        
        #In GlobalPCA all feature represent all channels
        self.channel_to_features = np.ones((cc.nb_channel, self.n_components), dtype='bool') 
Example #2
Source File: decomposition.py    From tridesclous with MIT License 6 votes vote down vote up
def transform(self, waveforms):
        #~ print('ici', waveforms.shape, self.ind_peak)
        features = waveforms[:, self.ind_peak, : ].copy()
        return features



#~ Parallel(n_jobs=n_jobs)(delayed(count_match_spikes)(sorting1.get_unit_spike_train(u1),
                                                                                  #~ s2_spiketrains, delta_frames) for
                                                      #~ i1, u1 in enumerate(unit1_ids))

#~ def get_pca_one_channel(wf_chan, chan, thresh, n_left, n_components_by_channel, params):
    #~ print(chan)
    #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_channel, **params)
    #~ wf_chan = waveforms[:,:,chan]
    #~ print(wf_chan.shape)
    #~ print(wf_chan[:, -n_left].shape)
    #~ keep = np.any((wf_chan>thresh) | (wf_chan<-thresh))
    #~ keep = (wf_chan[:, -n_left]>thresh) | (wf_chan[:, -n_left]<-thresh)

    #~ if keep.sum() >=n_components_by_channel:
        #~ pca.fit(wf_chan[keep, :])
        #~ return pca
    #~ else:
        #~ return None 
Example #3
Source File: test_monkeypatch.py    From daal4py with Apache License 2.0 6 votes vote down vote up
def test_monkey_patching(self):
        _tokens = daal4py.sklearn.sklearn_patch_names()
        self.assertTrue(isinstance(_tokens, list) and len(_tokens) > 0)
        for t in _tokens:
            daal4py.sklearn.unpatch_sklearn(t)
        for t in _tokens:
            daal4py.sklearn.patch_sklearn(t)

        import sklearn
        for a in [(sklearn.decomposition, 'PCA'),
                  (sklearn.linear_model, 'Ridge'),
                  (sklearn.linear_model, 'LinearRegression'),
                  (sklearn.cluster, 'KMeans'),
                  (sklearn.svm, 'SVC'),]:
            class_module = getattr(a[0], a[1]).__module__
            self.assertTrue(class_module.startswith('daal4py')) 
Example #4
Source File: feature_extraction.py    From retentioneering-tools with Mozilla Public License 2.0 5 votes vote down vote up
def get_manifold(data, manifold_type, **kwargs):
    """
    Reduces number of dimensions.

    Parameters
    ---------
    data: pd.DataFrame
        Dataframe with features for clustering indexed as in ``retention_config.index_col``
    manifold_type: str
        Name dimensionality reduction method from ``sklearn.decomposition`` and ``sklearn.manifold``
    kwargs: optional
        Parameters for ``sklearn.decomposition`` and ``sklearn.manifold`` methods.

    Returns
    --------
    pd.DataFrame with reduced dimensions.

    Return type
    --------
    pd.DataFrame
    """
    if hasattr(decomposition, manifold_type):
        man = getattr(decomposition, manifold_type)
    elif hasattr(manifold, manifold_type):
        man = getattr(manifold, manifold_type)
    else:
        raise ValueError(f'There is not such manifold {manifold_type}')
    tsvd = man(**{i: j for i, j in kwargs.items() if i in man.get_params(man)})
    res = tsvd.fit_transform(data)
    return pd.DataFrame(res, index=data.index) 
Example #5
Source File: feature_extraction.py    From retentioneering-tools with Mozilla Public License 2.0 5 votes vote down vote up
def merge_features(features, metadata, meta_index_col=None, manifold_type=None, fillna=None, drop=False, **kwargs):
    """
    Adds metadata to TFIDF of trajectories. Eeduced if ``manifold_type`` is not ``None``.

    Parameters
    --------
    features: pd.DataFrame
        Dataframe with users` metadata.
    metadata: pd.DataFrame
        Dataframe with user or session properties or any other information you would like to extract as features (e.g. user properties, LTV values, etc.). Default: ``None``
    meta_index_col: str, optional
        Used when metadata is not ``None``. Name of column in ``metadata`` dataframe that contains the same ID as in ``index_col``, or if not defined, same as in retention_config (e.g ID of users or sessions). If ``None``, then index of metadata dataframe is used instead. Default: ``None``
    manifold_type: str, optional
        Name dimensionality reduction method from ``sklearn.decomposition`` and ``sklearn.manifold``. Default: ``None``
    fillna: optional
        Value for filling missing metadata for any ``index_col`` value. Default: ``None``
    drop: bool, optional
        If ``True``, then drops users which do not exist in ``metadata`` dataframe. Default: ``False``
    kwargs: optional
        Keyword arguments for ``sklearn.decomposition`` and ``sklearn.manifold`` methods.

    Returns
    -------
    Dataframe with trajectory features (possibly reduced) and users metadata.

    Return type
    -------
    pd.DataFrame
    """
    if manifold_type is not None:
        features = get_manifold(features, manifold_type, **kwargs)
    if meta_index_col is not None:
        metadata.index = metadata[meta_index_col].values
        metadata = metadata.drop(meta_index_col, 1)
    res = features.join(metadata, rsuffix='_meta',)
    if drop and (fillna is None):
        res = res[res.isnull().sum(1) == 0].copy()
    if fillna is not None:
        res = res.fillna(fillna)
    return res 
Example #6
Source File: decomposition.py    From tridesclous with MIT License 5 votes vote down vote up
def transform(self, waveforms):
        flatten_waveforms = waveforms.reshape(waveforms.shape[0], -1)
        return self.lda.transform(flatten_waveforms)



#~ class NeighborhoodPca:
    #~ def __init__(self, waveforms, catalogueconstructor=None, n_components_by_neighborhood=6, radius_um=300., **params):
        #~ cc = catalogueconstructor
        
        #~ self.n_components_by_neighborhood = n_components_by_neighborhood
        #~ self.neighborhood = tools.get_neighborhood(cc.geometry, radius_um)
        
        #~ self.pcas = []
        #~ for c in range(cc.nb_channel):
            #~ neighbors = self.neighborhood[c, :]
            #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_neighborhood, **params)
            #~ wfs = waveforms[:,:,neighbors]
            #~ wfs = wfs.reshape(wfs.shape[0], -1)
            #~ pca.fit(wfs)
            #~ self.pcas.append(pca)

        #~ #In full NeighborhoodPca n_components_by_neighborhood feature correspond to one channel
        #~ self.channel_to_features = np.zeros((cc.nb_channel, cc.nb_channel*n_components_by_neighborhood), dtype='bool')
        #~ for c in range(cc.nb_channel):
            #~ self.channel_to_features[c, c*n_components_by_neighborhood:(c+1)*n_components_by_neighborhood] = True

    #~ def transform(self, waveforms):
        #~ n = self.n_components_by_neighborhood
        #~ all = np.zeros((waveforms.shape[0], waveforms.shape[2]*n), dtype=waveforms.dtype)
        #~ for c, pca in enumerate(self.pcas):
            #~ neighbors = self.neighborhood[c, :]
            #~ wfs = waveforms[:,:,neighbors]
            #~ wfs = wfs.reshape(wfs.shape[0], -1)
            #~ all[:, c*n:(c+1)*n] = pca.transform(wfs)
        #~ return all 
Example #7
Source File: cleancluster.py    From tridesclous with MIT License 5 votes vote down vote up
def _compute_one_dip_test(cc, dirname, chan_grp, label, n_components_local_pca, adjacency_radius_um):
    # compute dip test to try to over split
    from .dataio import DataIO
    from .catalogueconstructor import CatalogueConstructor
    
    if cc is None:
        dataio = DataIO(dirname)
        cc = CatalogueConstructor(dataio=dataio, chan_grp=chan_grp)

    peak_sign = cc.info['peak_detector_params']['peak_sign']
    dense_mode = cc.info['mode'] == 'dense'
    n_left = cc.info['waveform_extractor_params']['n_left']
    n_right = cc.info['waveform_extractor_params']['n_right']
    peak_width = n_right - n_left
    nb_channel = cc.nb_channel
    
    if dense_mode:
        channel_adjacency = {c: np.arange(nb_channel) for c in range(nb_channel)}
    else:
        channel_adjacency = {}
        for c in range(nb_channel):
            nearest, = np.nonzero(cc.channel_distances[c, :] < adjacency_radius_um)
            channel_adjacency[c] = nearest

    
    waveforms, wf_flat, peak_index = _get_sparse_waveforms_flatten(cc, dense_mode, label, channel_adjacency, n_spike_for_centroid=cc.n_spike_for_centroid)
    
    
    #~ pca =  sklearn.decomposition.IncrementalPCA(n_components=n_components_local_pca, whiten=True)
    
    n_components = min(wf_flat.shape[1]-1, n_components_local_pca)
    pca =  sklearn.decomposition.TruncatedSVD(n_components=n_components)
    
    feats = pca.fit_transform(wf_flat)
    pval = diptest(np.sort(feats[:, 0]), numt=200)
    
    return pval 
Example #8
Source File: common.py    From deep-image-retrieval with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform(pca, X, whitenp=0.5, whitenv=None, whitenm=1.0, use_sklearn=True):
    if use_sklearn:
        # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/decomposition/base.py#L99
        if pca.mean_ is not None:
            X = X - pca.mean_
        X_transformed = np.dot(X, pca.components_[:whitenv].T)
        if pca.whiten:
            X_transformed /= whitenm * np.power(pca.explained_variance_[:whitenv], whitenp)
    else:
        X = X - pca['means']
        X_transformed = np.dot(X, pca['W'])
    return X_transformed 
Example #9
Source File: test_custom_schemas.py    From lale with Apache License 2.0 5 votes vote down vote up
def setUp(self):
        import sklearn.decomposition
        import lale.lib.sklearn
        from lale.operators import make_operator
        self.sk_pca = make_operator(sklearn.decomposition.PCA, schemas={})
        self.ll_pca = lale.lib.sklearn.PCA
        self.maxDiff = None 
Example #10
Source File: pca.py    From lale with Apache License 2.0 5 votes vote down vote up
def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = sklearn.decomposition.PCA(**self._hyperparams) 
Example #11
Source File: signal.py    From gumpy with MIT License 5 votes vote down vote up
def dwt(raw_eeg_data, level, **kwargs):
    """Multilevel Discrete Wavelet Transform (DWT).

    Compute the DWT for a raw eeg signal on multiple levels.

    Args:
        raw_eeg_data (array_like): input data
        level (int >= 0): decomposition levels
        **kwargs: Additional arguments that will be forwarded to ``pywt.wavedec``

    Returns:
        A 2-element tuple containing

        - **float**: mean value of the first decomposition coefficients
        - **list**: list of mean values for the individual (detail) decomposition coefficients

    """
    wt_coeffs = pywt.wavedec(data = raw_eeg_data, level=level, **kwargs)

    # A7:  0 Hz - 1 Hz
    cAL_mean = np.nanmean(wt_coeffs[0], axis=0)
    details = []

    # For Fs = 128 H
    for i in range(1, level+1):
        # D7:  1 Hz - 2 Hz
        cDL_mean = np.nanmean(wt_coeffs[i], axis=0)
        details.append(cDL_mean)

    return cAL_mean, details 
Example #12
Source File: signal.py    From gumpy with MIT License 5 votes vote down vote up
def artifact_removal(X, n_components=None, check_result=True):
    """Remove artifacts from data.

    The artifacts are detected via Independent Component Analysis (ICA) and
    subsequently removed. To plot the results, use
    :func:`gumpy.plot.artifact_removal`

    Args:
        X (array_like): Data to remove artifacts from
        n_components (int): Number of components for ICA. If None is passed, all will be used
        check_result (bool): Examine/test the ICA model by reverting the mixing.


    Returns:
        A 2-tuple containing

        - **ndarray**: The reconstructed signal without artifacts.
        - **ndarray**: The mixing matrix that wqas used by ICA.

    """

    ica = sklearn.decomposition.FastICA(n_components)
    S_reconst = ica.fit_transform(X)
    A_mixing = ica.mixing_
    if check_result:
        assert np.allclose(X, np.dot(S_reconst, A_mixing.T) + ica.mean_)

    return S_reconst, A_mixing 
Example #13
Source File: decomposition.py    From tridesclous with MIT License 4 votes vote down vote up
def __init__(self, catalogueconstructor=None, selection=None, n_components_by_channel=3, adjacency_radius_um=200, **params):
        
        cc = catalogueconstructor
        
        thresh = cc.info['peak_detector_params']['relative_threshold']
        n_left = cc.info['waveform_extractor_params']['n_left']
        self.dtype = cc.info['internal_dtype']
        
        
        #~ self.waveforms = waveforms
        self.n_components_by_channel = n_components_by_channel
        self.adjacency_radius_um = adjacency_radius_um
        
        
        #~ t1 = time.perf_counter()
        if selection is None:
            peaks_index = cc.some_peaks_index
        else:
            peaks_index,  = np.nonzero(selection)
        
        some_peaks = cc.all_peaks[peaks_index]
        
        self.pcas = []
        for chan in range(cc.nb_channel):
        #~ for chan in range(20):
            #~ print('fit', chan)
            sel = some_peaks['channel'] == chan
            wf_chan = cc.get_some_waveforms(peaks_index=peaks_index[sel], channel_indexes=[chan])
            wf_chan = wf_chan[:, :, 0]
            #~ print(wf_chan.shape)
            
            if wf_chan.shape[0] - 1 > n_components_by_channel:
                #~ pca = sklearn.decomposition.IncrementalPCA(n_components=n_components_by_channel, **params)
                #~ print('PcaByChannel SVD')
                pca = sklearn.decomposition.TruncatedSVD(n_components=n_components_by_channel, **params)
                pca.fit(wf_chan)
            else:
                pca = None
            self.pcas.append(pca)

        #~ t2 = time.perf_counter()
        #~ print('pca fit', t2-t1)
            


            
            #~ pca = get_pca_one_channel(waveforms, chan, thresh, n_left, n_components_by_channel, params)
            
        #~ n_jobs = -1
        #~ self.pcas = joblib.Parallel(n_jobs=n_jobs)(joblib.delayed(get_pca_one_channel)(waveforms, chan, thresh, n_components_by_channel, params) for chan in range(cc.nb_channel))
        

        #In full PcaByChannel n_components_by_channel feature correspond to one channel
        self.channel_to_features = np.zeros((cc.nb_channel, cc.nb_channel*n_components_by_channel), dtype='bool')
        for c in range(cc.nb_channel):
            self.channel_to_features[c, c*n_components_by_channel:(c+1)*n_components_by_channel] = True