Python sklearn.mixture() Examples

The following are 13 code examples of sklearn.mixture(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn , or try the search function .
Example #1
Source File: gmm.py    From vampyre with MIT License 6 votes vote down vote up
def est_init(self, return_cost=False,ind_out=None,\
        avg_var_cost=True):
        """
        Initial estimator.
        
        See the base class :class:`vampyre.estim.base.Estim` for 
        a complete description.
        
        :param Boolean return_cost:  Flag indicating if :code:`cost` is 
            to be returned
        :returns: :code:`zmean, zvar, [cost]` which are the
            prior mean and variance
        """           
       
        # otherwise, use the mixture estimator
        return self.mix.est_init(return_cost,ind_out,avg_var_cost) 
Example #2
Source File: GMM.py    From sprocket with MIT License 6 votes vote down vote up
def __init__(self, n_mix=32, n_iter=100, covtype='full'):
        self.n_mix = n_mix
        self.n_iter = n_iter
        self.covtype = covtype

        self.random_state = np.random.mtrand._rand

        # construct GMM parameter
        if self.covtype == 'full':
            self.param = sklearn.mixture.GaussianMixture(
                n_components=self.n_mix,
                covariance_type=self.covtype,
                max_iter=self.n_iter)
        elif self.covtype == 'block_diag':
            self.param = BlockDiagonalGaussianMixture(
                n_mix=self.n_mix,
                n_iter=self.n_iter)
        else:
            raise ValueError('Covariance type should be full or block_diag') 
Example #3
Source File: GMM.py    From sprocket with MIT License 6 votes vote down vote up
def _gmmmap(self, sddata):
        # parameter for sequencial data
        T, sddim = sddata.shape

        # estimate posterior sequence
        wseq = self.pX.predict_proba(sddata)

        # estimate mixture sequence
        cseq = np.argmax(wseq, axis=1)

        mseq = np.zeros((T, sddim))
        covseq = np.zeros((T, sddim, sddim))
        for t in range(T):
            # read maximum likelihood mixture component in frame t
            m = cseq[t]

            # conditional mean vector sequence
            mseq[t] = self.meanY[m] + \
                self.A[m] @ (sddata[t] - self.meanX[m])

            # conditional covariance sequence
            covseq[t] = self.cond_cov_inv[m]

        return cseq, wseq, mseq, covseq 
Example #4
Source File: signal_binarize.py    From NeuroKit with MIT License 5 votes vote down vote up
def _signal_binarize(signal, method="threshold", threshold="auto"):
    method = method.lower()  # remove capitalised letters
    if method == "threshold":
        binary = _signal_binarize_threshold(signal, threshold=threshold)
    elif method == "mixture":
        binary = _signal_binarize_mixture(signal, threshold=threshold)
    else:
        raise ValueError("NeuroKit error: signal_binarize(): 'method' should be one of 'threshold' or 'mixture'.")
    return binary


# =============================================================================
# Methods
# ============================================================================= 
Example #5
Source File: signal_binarize.py    From NeuroKit with MIT License 5 votes vote down vote up
def _signal_binarize_mixture(signal, threshold="auto"):
    if threshold == "auto":
        threshold = 0.5

    # fit a Gaussian Mixture Model with two components
    clf = sklearn.mixture.GaussianMixture(n_components=2, random_state=333)
    clf = clf.fit(signal.reshape(-1, 1))

    # Get predicted probabilities
    probability = clf.predict_proba(signal.reshape(-1, 1))[:, np.argmax(clf.means_[:, 0])]

    binary = np.zeros(len(signal))
    binary[probability >= threshold] = 1
    return binary 
Example #6
Source File: gmm.py    From vampyre with MIT License 5 votes vote down vote up
def set_gmm_param(self,probc,meanc,varc):
        """
        Sets the GMM parameters for the mixture estimator
        """
        nc = len(probc)
                
        if self.mix is None:
            # If the mixture estimator does not exist, create it        
            # First, create the component Gaussian estimators
            est_list = []
            for i in range(nc):
                esti = GaussEst(meanc[i], varc[i], self.shape, 
                     var_axes = self.var_axes, zmean_axes='all',
                     is_complex=self.is_complex, map_est=self.map_est)
                est_list.append(esti)
                
            # Create the mixture 
            self.mix = MixEst(est_list,w=probc)                
            
        else:
            # If the mixture distribution is already created,
            # set the parameters of the mixture estimator
            self.probc = probc
            self.mix.w = np.copy(probc)
            for i in range(nc):
                esti = self.mix.est_list[i]
                if not self.mean_fix[i]:
                    esti.zmean = meanc[i]
                if not self.var_fix[i]:
                    esti.zvar = np.copy(varc[i]) 
Example #7
Source File: gmm.py    From vampyre with MIT License 5 votes vote down vote up
def update_gmm_em(self):
        """
        Updates the GMM parameters using EM estimation
        """
        
        # Get the posterior probabilities, means and variances from the mixture
        # estimator.  The lists have one element for each component in the mixture
        prob_list = self.mix.prob
        zmean_list = self.mix.zmean_list
        zvar_list = self.mix.zvar_list            
        
        # Compute new cluster probabilities mean and variances
        nc = len(prob_list)
        probc = np.zeros(nc)
        meanc = np.zeros(nc)
        varc = []
        for i in range(nc):
            probc[i] = np.mean(prob_list[i])
            meanc[i] = np.mean(prob_list[i]*zmean_list[i])/probc[i]
            dsq = zvar_list[i] + np.abs((zmean_list[i]-meanc[i]))**2
            varci = np.mean((prob_list[i]*dsq)/probc[i],axis=self.var_axes)
            varci = np.maximum(varci, self.zvarmin)
            varc.append(varci)
                

        # Set the parameters
        self.set_gmm_param(probc,meanc,varc) 
Example #8
Source File: GMM.py    From sprocket with MIT License 5 votes vote down vote up
def train_singlepath(self, tar_jnt):
        """Fit GMM parameter based on single-path training
        M-step :
            Update GMM parameter using `self.log_resp`, and `tar_jnt`

        Parameters
        ----------
        tar_jnt: array, shape(`T`, `tar_dim`)
            Joint feature vector of original and target feature vector
            consisting of static and delta components, which will be modeled.

        Returns
        -------
        param :
            Sklean-based model parameters of the GMM

        """
        if self.covtype == 'full':
            single_param = sklearn.mixture.GaussianMixture(
                n_components=self.n_mix,
                covariance_type=self.covtype,
                max_iter=1)
        elif self.covtype == 'block_diag':
            single_param = BlockDiagonalGaussianMixture(
                n_mix=self.n_mix,
                n_iter=self.n_iter)
        else:
            raise ValueError('Covariance type should be full or block_diag')

        # initialize target single-path param
        single_param._initialize_parameters(tar_jnt, self.random_state)

        # perform m-step
        single_param._m_step(tar_jnt, self.log_resp)

        return single_param 
Example #9
Source File: GMM.py    From sprocket with MIT License 5 votes vote down vote up
def _set_Ab(self):
        # calculate A and b from self.jmean, self.jcov
        sddim = self.jmean.shape[1] // 2

        # calculate inverse covariance for covariance XX in each mixture
        self.covXXinv = np.zeros((self.n_mix, sddim, sddim))
        for m in range(self.n_mix):
            self.covXXinv[m] = np.linalg.inv(self.covXX[m])

        # calculate A, b, and conditional covariance given X
        self.A = np.zeros((self.n_mix, sddim, sddim))
        self.b = np.zeros((self.n_mix, sddim))
        self.cond_cov_inv = np.zeros((self.n_mix, sddim, sddim))
        for m in range(self.n_mix):
            # calculate A (i.e., A = yxcov_m * xxcov_m^-1)
            self.A[m] = self.covYX[m] @ self.covXXinv[m]

            # calculate b (i.e., b = mean^Y - A * mean^X)
            self.b[m] = self.meanY[m] - self.A[m] @ self.meanX[m]

            # calculate conditional covariance
            # (i.e., cov^(Y|X)^-1 = (yycov - A * xycov)^-1)
            self.cond_cov_inv[m] = np.linalg.inv(self.covYY[
                m] - self.A[m] @ self.covXY[m])

        return 
Example #10
Source File: GMM.py    From sprocket with MIT License 5 votes vote down vote up
def _set_pX(self):
        # probability density function of X
        self.pX = sklearn.mixture.GaussianMixture(
            n_components=self.n_mix, covariance_type='full')
        self.pX.weights_ = self.w
        self.pX.means_ = self.meanX
        self.pX.covariances_ = self.covXX

        # following function is required to estimate porsterior
        self.pX.precisions_cholesky_ = _compute_precision_cholesky(
            self.covXX, 'full')
        return 
Example #11
Source File: scdv.py    From redshells with MIT License 5 votes vote down vote up
def __init__(self, documents: List[List[str]], cluster_size: int, sparsity_percentage: float, gaussian_mixture_kwargs: Dict[Any, Any],
                 dictionary: gensim.corpora.Dictionary, w2v: Union[FastText, Word2Vec]) -> None:
        """

        :param documents: documents for training.
        :param cluster_size:  word cluster size.
        :param sparsity_percentage: sparsity percentage. This must be in [0, 1].
        :param gaussian_mixture_kwargs: Arguments to build `sklearn.mixture.GaussianMixture` except cluster_size. Please see `sklearn.mixture.GaussianMixture.__init__` for details.
        :param dictionary: `gensim.corpora.Dictionary`. 
        """
        logger.info('_build_dictionary...')
        self._dictionary = dictionary
        vocabulary_size = len(self._dictionary.token2id)
        embedding_size = w2v.wv.vector_size

        logger.info('_build_word_embeddings...')
        self._word_embeddings = self._build_word_embeddings(self._dictionary, w2v)
        assert self._word_embeddings.shape == (vocabulary_size, embedding_size)

        logger.info('_build_word_cluster_probabilities...')
        self._word_cluster_probabilities = self._build_word_cluster_probabilities(self._word_embeddings, cluster_size, gaussian_mixture_kwargs)
        assert self._word_cluster_probabilities.shape == (vocabulary_size, cluster_size)

        logger.info('_build_idf...')
        self._idf = self._build_idf(self._dictionary)
        assert self._idf.shape == (vocabulary_size, )

        logger.info('_build_word_cluster_vectors...')
        word_cluster_vectors = self._build_word_cluster_vectors(self._word_embeddings, self._word_cluster_probabilities)
        assert word_cluster_vectors.shape == (vocabulary_size, cluster_size, embedding_size)

        logger.info('_build_word_topic_vectors...')
        word_topic_vectors = self._build_word_topic_vectors(self._idf, word_cluster_vectors)
        assert word_topic_vectors.shape == (vocabulary_size, (cluster_size * embedding_size))

        logger.info('_build_sparsity_threshold...')
        self._sparse_threshold = self._build_sparsity_threshold(word_topic_vectors, self._dictionary, documents, sparsity_percentage) 
Example #12
Source File: distribution.py    From L2L with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, n_components=2, **kwargs):
        self.random_state = None
        self.bayesian_mixture = sklearn.mixture.BayesianGaussianMixture(
            n_components,
            weight_concentration_prior_type='dirichlet_distribution',
            random_state=self.random_state, **kwargs)
        # taken from check_fitted function of BaysianGaussianMixture in the sklearn repository
        self.parametrization = ('covariances_', 'means_', 'weight_concentration_', 'weights_',
                                'mean_precision_', 'degrees_of_freedom_', 'precisions_', 'precisions_cholesky_')
        self.n_components = n_components 
Example #13
Source File: signal_binarize.py    From NeuroKit with MIT License 4 votes vote down vote up
def signal_binarize(signal, method="threshold", threshold="auto"):
    """Binarize a continuous signal.

    Convert a continuous signal into zeros and ones depending on a given threshold.

    Parameters
    ----------
    signal : Union[list, np.array, pd.Series]
        The signal (i.e., a time series) in the form of a vector of values.
    method : str
        The algorithm used to discriminate between the two states. Can be one of 'mixture' (default) or
        'threshold'. If 'mixture', will use a Gaussian Mixture Model to categorize between the two states.
        If 'threshold', will consider as activated all points which value is superior to the threshold.
    threshold : float
        If `method` is 'mixture', then it corresponds to the minimum probability required to be considered
        as activated (if 'auto', then 0.5). If `method` is 'threshold', then it corresponds to the minimum
        amplitude to detect as onset. If "auto", takes the value between the max and the min.

    Returns
    -------
    list
        A list or array depending on the type passed.

    Examples
    --------
    >>> import numpy as np
    >>> import pandas as pd
    >>> import neurokit2 as nk
    >>>
    >>> signal = np.cos(np.linspace(start=0, stop=20, num=1000))
    >>> binary = nk.signal_binarize(signal)
    >>> fig = pd.DataFrame({"Raw": signal, "Binary": binary}).plot()
    >>> fig #doctest: +SKIP

    """

    # Return appropriate type
    if isinstance(signal, list):
        binary = _signal_binarize(np.array(signal), method=method, threshold=threshold)
        signal = list(binary)
    elif isinstance(signal, pd.Series):
        signal = signal.copy()  # Avoid annoying pandas warning
        binary = _signal_binarize(signal.values, method=method, threshold=threshold)
        signal[:] = binary
    else:
        signal = _signal_binarize(signal, method=method, threshold=threshold)

    return signal