Python sklearn.mixture.BayesianGaussianMixture() Examples

The following are 30 code examples for showing how to use sklearn.mixture.BayesianGaussianMixture(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module sklearn.mixture , or try the search function .

Example 1
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 6 votes vote down vote up
def test_monotonic_likelihood():
    # We check that each step of the each step of variational inference without
    # regularization improve monotonically the training set of the bound
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=20)
    n_components = rand_data.n_components

    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            bgmm = BayesianGaussianMixture(
                weight_concentration_prior_type=prior_type,
                n_components=2 * n_components, covariance_type=covar_type,
                warm_start=True, max_iter=1, random_state=rng, tol=1e-4)
            current_lower_bound = -np.infty
            # Do one training iteration at a time so we can make sure that the
            # training log likelihood increases after each iteration.
            for _ in range(600):
                prev_lower_bound = current_lower_bound
                current_lower_bound = bgmm.fit(X).lower_bound_
                assert_greater_equal(current_lower_bound, prev_lower_bound)

                if bgmm.converged_:
                    break
            assert(bgmm.converged_) 
Example 2
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 6 votes vote down vote up
def test_invariant_translation():
    # We check here that adding a constant in the data change correctly the
    # parameters of the mixture
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=100)
    n_components = 2 * rand_data.n_components

    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            bgmm1 = BayesianGaussianMixture(
                weight_concentration_prior_type=prior_type,
                n_components=n_components, max_iter=100, random_state=0,
                tol=1e-3, reg_covar=0).fit(X)
            bgmm2 = BayesianGaussianMixture(
                weight_concentration_prior_type=prior_type,
                n_components=n_components, max_iter=100, random_state=0,
                tol=1e-3, reg_covar=0).fit(X + 100)

            assert_almost_equal(bgmm1.means_, bgmm2.means_ - 100)
            assert_almost_equal(bgmm1.weights_, bgmm2.weights_)
            assert_almost_equal(bgmm1.covariances_, bgmm2.covariances_) 
Example 3
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 6 votes vote down vote up
def test_bayesian_mixture_fit_predict(seed, max_iter, tol):
    rng = np.random.RandomState(seed)
    rand_data = RandomData(rng, scale=7)
    n_components = 2 * rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        bgmm1 = BayesianGaussianMixture(n_components=n_components,
                                        max_iter=max_iter, random_state=rng,
                                        tol=tol, reg_covar=0)
        bgmm1.covariance_type = covar_type
        bgmm2 = copy.deepcopy(bgmm1)
        X = rand_data.X[covar_type]

        Y_pred1 = bgmm1.fit(X).predict(X)
        Y_pred2 = bgmm2.fit_predict(X)
        assert_array_equal(Y_pred1, Y_pred2) 
Example 4
Project: VAE_NBP   Author: bobchennan   File: dp.py    License: MIT License 6 votes vote down vote up
def test(epoch, prior):
    ans = np.zeros((50, 10))
    for data, lab in test_loader:
        C = prior.predict(data.numpy().reshape(data.numpy().shape[0],-1))
        for i in xrange(len(lab)):
            ans[C[i],lab[i]]+=1
    print(ans)
    s = np.sum(ans)
    v = 0
    for i in xrange(ans.shape[0]):
        for j in xrange(ans.shape[1]):
            if ans[i,j]>0:
                v += ans[i,j]/s*np.log(ans[i,j]/s/(np.sum(ans[i,:])/s)/(np.sum(ans[:,j])/s))
    print("Mutual information: "+str(v))

#prior = BayesianGaussianMixture(n_components=100, covariance_type='diag') 
Example 5
Project: CTGAN   Author: sdv-dev   File: transformer.py    License: MIT License 6 votes vote down vote up
def _fit_continuous(self, column, data):
        gm = BayesianGaussianMixture(
            self.n_clusters,
            weight_concentration_prior_type='dirichlet_process',
            weight_concentration_prior=0.001,
            n_init=1
        )
        gm.fit(data)
        components = gm.weights_ > self.epsilon
        num_components = components.sum()

        return {
            'name': column,
            'model': gm,
            'components': components,
            'output_info': [(1, 'tanh'), (num_components, 'softmax')],
            'output_dimensions': 1 + num_components,
        } 
Example 6
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bayesian_mixture.py    License: MIT License 6 votes vote down vote up
def test_monotonic_likelihood():
    # We check that each step of the each step of variational inference without
    # regularization improve monotonically the training set of the bound
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=20)
    n_components = rand_data.n_components

    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            bgmm = BayesianGaussianMixture(
                weight_concentration_prior_type=prior_type,
                n_components=2 * n_components, covariance_type=covar_type,
                warm_start=True, max_iter=1, random_state=rng, tol=1e-4)
            current_lower_bound = -np.infty
            # Do one training iteration at a time so we can make sure that the
            # training log likelihood increases after each iteration.
            for _ in range(600):
                prev_lower_bound = current_lower_bound
                current_lower_bound = bgmm.fit(X).lower_bound_
                assert_greater_equal(current_lower_bound, prev_lower_bound)

                if bgmm.converged_:
                    break
            assert(bgmm.converged_) 
Example 7
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bayesian_mixture.py    License: MIT License 6 votes vote down vote up
def test_invariant_translation():
    # We check here that adding a constant in the data change correctly the
    # parameters of the mixture
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=100)
    n_components = 2 * rand_data.n_components

    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            bgmm1 = BayesianGaussianMixture(
                weight_concentration_prior_type=prior_type,
                n_components=n_components, max_iter=100, random_state=0,
                tol=1e-3, reg_covar=0).fit(X)
            bgmm2 = BayesianGaussianMixture(
                weight_concentration_prior_type=prior_type,
                n_components=n_components, max_iter=100, random_state=0,
                tol=1e-3, reg_covar=0).fit(X + 100)

            assert_almost_equal(bgmm1.means_, bgmm2.means_ - 100)
            assert_almost_equal(bgmm1.weights_, bgmm2.weights_)
            assert_almost_equal(bgmm1.covariances_, bgmm2.covariances_) 
Example 8
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_covariance_type():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2
    X = rng.rand(n_samples, n_features)

    covariance_type = 'bad_covariance_type'
    bgmm = BayesianGaussianMixture(covariance_type=covariance_type,
                                   random_state=rng)
    assert_raise_message(ValueError,
                         "Invalid value for 'covariance_type': %s "
                         "'covariance_type' should be in "
                         "['spherical', 'tied', 'diag', 'full']"
                         % covariance_type, bgmm.fit, X) 
Example 9
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_weight_concentration_prior_type():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2
    X = rng.rand(n_samples, n_features)

    bad_prior_type = 'bad_prior_type'
    bgmm = BayesianGaussianMixture(
        weight_concentration_prior_type=bad_prior_type, random_state=rng)
    assert_raise_message(ValueError,
                         "Invalid value for 'weight_concentration_prior_type':"
                         " %s 'weight_concentration_prior_type' should be in "
                         "['dirichlet_process', 'dirichlet_distribution']"
                         % bad_prior_type, bgmm.fit, X) 
Example 10
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_weights_prior_initialisation():
    rng = np.random.RandomState(0)
    n_samples, n_components, n_features = 10, 5, 2
    X = rng.rand(n_samples, n_features)

    # Check raise message for a bad value of weight_concentration_prior
    bad_weight_concentration_prior_ = 0.
    bgmm = BayesianGaussianMixture(
        weight_concentration_prior=bad_weight_concentration_prior_,
        random_state=0)
    assert_raise_message(ValueError,
                         "The parameter 'weight_concentration_prior' "
                         "should be greater than 0., but got %.3f."
                         % bad_weight_concentration_prior_,
                         bgmm.fit, X)

    # Check correct init for a given value of weight_concentration_prior
    weight_concentration_prior = rng.rand()
    bgmm = BayesianGaussianMixture(
        weight_concentration_prior=weight_concentration_prior,
        random_state=rng).fit(X)
    assert_almost_equal(weight_concentration_prior,
                        bgmm.weight_concentration_prior_)

    # Check correct init for the default value of weight_concentration_prior
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   random_state=rng).fit(X)
    assert_almost_equal(1. / n_components, bgmm.weight_concentration_prior_) 
Example 11
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_check_is_fitted():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2

    # Check raise message
    bgmm = BayesianGaussianMixture(random_state=rng)
    X = rng.rand(n_samples, n_features)
    assert_raise_message(ValueError,
                         'This BayesianGaussianMixture instance is not '
                         'fitted yet.', bgmm.score, X) 
Example 12
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_check_covariance_precision():
    # We check that the dot product of the covariance and the precision
    # matrices is identity.
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components, n_features = 2 * rand_data.n_components, 2

    # Computation of the full_covariance
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   max_iter=100, random_state=rng, tol=1e-3,
                                   reg_covar=0)
    for covar_type in COVARIANCE_TYPE:
        bgmm.covariance_type = covar_type
        bgmm.fit(rand_data.X[covar_type])

        if covar_type == 'full':
            for covar, precision in zip(bgmm.covariances_, bgmm.precisions_):
                assert_almost_equal(np.dot(covar, precision),
                                    np.eye(n_features))
        elif covar_type == 'tied':
            assert_almost_equal(np.dot(bgmm.covariances_, bgmm.precisions_),
                                np.eye(n_features))

        elif covar_type == 'diag':
            assert_almost_equal(bgmm.covariances_ * bgmm.precisions_,
                                np.ones((n_components, n_features)))

        else:
            assert_almost_equal(bgmm.covariances_ * bgmm.precisions_,
                                np.ones(n_components)) 
Example 13
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_predict_predict_proba():
    # this is the same test as test_gaussian_mixture_predict_predict_proba()
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    for prior_type in PRIOR_TYPE:
        for covar_type in COVARIANCE_TYPE:
            X = rand_data.X[covar_type]
            Y = rand_data.Y
            bgmm = BayesianGaussianMixture(
                n_components=rand_data.n_components,
                random_state=rng,
                weight_concentration_prior_type=prior_type,
                covariance_type=covar_type)

            # Check a warning message arrive if we don't do fit
            assert_raise_message(NotFittedError,
                                 "This BayesianGaussianMixture instance"
                                 " is not fitted yet. Call 'fit' with "
                                 "appropriate arguments before using "
                                 "this method.", bgmm.predict, X)

            bgmm.fit(X)
            Y_pred = bgmm.predict(X)
            Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
            assert_array_equal(Y_pred, Y_pred_proba)
            assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95) 
Example 14
Project: VAE_NBP   Author: bobchennan   File: vae_dp.py    License: MIT License 5 votes vote down vote up
def train(epoch, prior):
    model.train()
    train_loss = 0
    #prior = BayesianGaussianMixture(n_components=1, covariance_type='diag')
    tmp = []
    for (data,_) in train_loader:
        data = Variable(data)
        if args.cuda:
            data = data.cuda()
        recon_batch, mu, logvar, z = model(data)
        tmp.append(z.cpu().data.numpy())
    print('Update Prior')
    prior.fit(np.vstack(tmp))
    print('prior: '+str(prior.weights_))
    for batch_idx, (data, _) in enumerate(train_loader):
        data = Variable(data)
        if args.cuda:
            data = data.cuda()
        optimizer.zero_grad()
        recon_batch, mu, logvar, z = model(data)
        loss = loss_function(recon_batch, data, mu, logvar, prior, z)
        loss.backward()
        train_loss += loss.data[0]
        optimizer.step()
        #if batch_idx % args.log_interval == 0:
        #    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        #        epoch, batch_idx * len(data), len(train_loader.dataset),
        #        100. * batch_idx / len(train_loader),
        #        loss.data[0] / len(data)))

    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / len(train_loader.dataset)))
    return prior 
Example 15
Project: VAE_NBP   Author: bobchennan   File: dp.py    License: MIT License 5 votes vote down vote up
def train(epoch, prior):
    prior = BayesianGaussianMixture(n_components=50, covariance_type='diag', n_init=5, max_iter=1000)
    tmp = []
    for (data,_) in train_loader:
        #print(data.numpy().shape)
        tmp.append(data.numpy().reshape(data.numpy().shape[0],-1))
    prior.fit(np.vstack(tmp))
    return prior 
Example 16
Project: nni   Author: microsoft   File: CreateModel.py    License: MIT License 5 votes vote down vote up
def create_model(samples_x, samples_y_aggregation, percentage_goodbatch=0.34):
    '''
    Create the Gaussian Mixture Model
    '''
    samples = [samples_x[i] + [samples_y_aggregation[i]]
               for i in range(0, len(samples_x))]

    # Sorts so that we can get the top samples
    samples = sorted(samples, key=itemgetter(-1))
    samples_goodbatch_size = int(len(samples) * percentage_goodbatch)
    samples_goodbatch = samples[0:samples_goodbatch_size]
    samples_badbatch = samples[samples_goodbatch_size:]

    samples_x_goodbatch = [sample_goodbatch[0:-1]
                           for sample_goodbatch in samples_goodbatch]
    #samples_y_goodbatch = [sample_goodbatch[-1] for sample_goodbatch in samples_goodbatch]
    samples_x_badbatch = [sample_badbatch[0:-1]
                          for sample_badbatch in samples_badbatch]

    # === Trains GMM clustering models === #
    #sys.stderr.write("[%s] Train GMM's GMM model\n" % (os.path.basename(__file__)))
    bgmm_goodbatch = mm.BayesianGaussianMixture(
        n_components=max(1, samples_goodbatch_size - 1))
    bad_n_components = max(1, len(samples_x) - samples_goodbatch_size - 1)
    bgmm_badbatch = mm.BayesianGaussianMixture(n_components=bad_n_components)
    bgmm_goodbatch.fit(samples_x_goodbatch)
    bgmm_badbatch.fit(samples_x_badbatch)

    model = {}
    model['clusteringmodel_good'] = bgmm_goodbatch
    model['clusteringmodel_bad'] = bgmm_badbatch
    return model 
Example 17
Project: SDGym   Author: sdv-dev   File: utils.py    License: MIT License 5 votes vote down vote up
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
        model = []

        self.output_info = []
        self.output_dim = 0
        self.components = []
        for id_, info in enumerate(self.meta):
            if info['type'] == CONTINUOUS:
                gm = BayesianGaussianMixture(
                    self.n_clusters,
                    weight_concentration_prior_type='dirichlet_process',
                    weight_concentration_prior=0.001,
                    n_init=1)
                gm.fit(data[:, id_].reshape([-1, 1]))
                model.append(gm)
                comp = gm.weights_ > self.eps
                self.components.append(comp)

                self.output_info += [(1, 'tanh'), (np.sum(comp), 'softmax')]
                self.output_dim += 1 + np.sum(comp)
            else:
                model.append(None)
                self.components.append(None)
                self.output_info += [(info['size'], 'softmax')]
                self.output_dim += info['size']

        self.model = model 
Example 18
Project: pyImSegm   Author: Borda   File: region_growing.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform_rays_model_cdf_mixture(list_rays, coef_components=1):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int coef_components: multiplication for number of components
    :return any, list(list(int)): mixture model, cumulative distribution

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, cdist = transform_rays_model_cdf_mixture(list_rays)
    >>> # the rounding variate a bit according GMM estimated model
    >>> np.round(np.array(cdist) * 4) / 4.  # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
    array([[ 1. , 1. , 1. , 1. , 1. , 1. , 0.75, 0.75, 0.5 , 0.25, 0. ],
           [ 1. , 1. , 1. , 1. , 1. , 1. , 1.  , 0.75, 0.5 , 0.25, 0. ],
           [ 1. , 1. , 1. , 1. , 1. , 1. , ...,  0.75, 0.5 , 0.25, 0. ]])
    """
    rays = np.array(list_rays)
    ms = cluster.MeanShift()
    ms.fit(rays)
    logging.debug('MeanShift found: %r', np.bincount(ms.labels_))

    nb_components = int(len(np.unique(ms.labels_)) * coef_components)
    mm = mixture.BayesianGaussianMixture(n_components=nb_components)
    # gmm.fit(np.array(list_rays))
    mm.fit(rays, ms.labels_)
    logging.debug('Mixture model found % components with weights: %r',
                  len(mm.weights_), mm.weights_)

    # compute the fairest mean + sigma over all components and ray angles
    max_dist = np.max([[m[i] + np.sqrt(c[i, i]) for i in range(len(m))]
                       for m, c in zip(mm.means_, mm.covariances_)])
    # max_dist = np.max(rays)

    # fixing, AttributeError: 'BayesianGaussianMixture' object has no attribute 'covariances'
    covs = mm.covariances if hasattr(mm, 'covariances') else mm.covariances_
    stds = np.sqrt(abs(covs))[:, np.eye(mm.means_.shape[1], dtype=bool)]
    # stds = np.sum(mm.covariances_, axis=-1)
    cdist = compute_cumulative_distrib(mm.means_, stds, mm.weights_, max_dist)
    return mm, cdist.tolist() 
Example 19
Project: pyImSegm   Author: Borda   File: region_growing.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def transform_rays_model_sets_mean_cdf_mixture(list_rays, nb_components=5, slic_size=15):
    """ compute the mixture model and transform it into cumulative distribution

    :param list(list(int)) list_rays: list ray features (distances)
    :param int nb_components: number components in mixture model
    :param int slic_size: superpixel size
    :return tuple(any,list(list(int))):  mixture model, list of stat/param of models

    >>> np.random.seed(0)
    >>> list_rays = [[9, 4, 9], [4, 9, 7], [9, 7, 11], [10, 8, 10],
    ...              [9, 11, 8], [4, 8, 5], [8, 10, 6], [9, 7, 11]]
    >>> mm, mean_cdf = transform_rays_model_sets_mean_cdf_mixture(list_rays, 2)
    >>> len(mean_cdf)
    2
    """
    rays = np.array(list_rays)
    # mm = mixture.GaussianMixture(n_components=nb_components,
    #                                      covariance_type='diag')
    mm = mixture.BayesianGaussianMixture(n_components=nb_components,
                                         covariance_type='diag')
    mm.fit(rays)
    logging.debug('Mixture model found % components with weights: %r',
                  len(mm.weights_), mm.weights_)

    list_mean_cdf = []
    # stds = mm.covariances_[:, np.eye(mm.means_.shape[1], dtype=bool)]
    # stds = mm.covariances_  # for covariance_type='diag'
    # diff_means = np.max(mm.means_, axis=0) - np.min(mm.means_, axis=0)
    for mean, covar in zip(mm.means_, mm.covariances_):
        std = np.sqrt(covar + 1) * 2 + slic_size
        mean = ndimage.gaussian_filter1d(mean, 1)
        std = ndimage.gaussian_filter1d(std, 1)
        max_dist = np.max(mean + 2 * std)
        cdist = compute_cumulative_distrib(np.array([mean]), np.array([std]),
                                           np.array([1]), max_dist)
        list_mean_cdf.append((mean.tolist(), cdist))

    return mm, list_mean_cdf 
Example 20
Project: scikit-lego   Author: koaning   File: bayesian_gmm_classifier.py    License: MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = BayesianGaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weight_concentration_prior_type=self.weight_concentration_prior_type,
                weight_concentration_prior=self.weight_concentration_prior,
                mean_precision_prior=self.mean_precision_prior,
                mean_prior=self.mean_prior,
                degrees_of_freedom_prior=self.degrees_of_freedom_prior,
                covariance_prior=self.covariance_prior,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self 
Example 21
Project: wgd   Author: arzwa   File: modeling.py    License: GNU General Public License v3.0 5 votes vote down vote up
def fit_bgmm(X, n1, n2, gamma=1e-3, max_iter=100, n_init=1, **kwargs):
    """
    Compute Bayesian Gaussian mixture

    :param X: data frame (log transformed Ks values)
    :param n1: minimum number of components
    :param n2: maximum number of components
    :param gamma: inverse of regularization strength
    :param max_iter: maximum number of iterations
    :param n_init: number of k-means initializations
    :param kwargs: other keyword args for `GaussianMixture`
    :return: models
    """
    # fit models with 1 to n components
    N = np.arange(n1, n2 + 1)
    models = [None for i in range(len(N))]

    for i in range(len(N)):
        logging.info("Fitting BGMM with {} components".format(N[i]))
        models[i] = mixture.BayesianGaussianMixture(
                weight_concentration_prior=gamma, n_init=n_init,
                n_components=N[i], covariance_type='full', max_iter=max_iter,
                **kwargs
        ).fit(X)
        log_components(models[i])

    return models 
Example 22
Project: pandas-ml   Author: pandas-ml   File: test_mixture.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_objectmapper(self):
        df = pdml.ModelFrame([])
        self.assertIs(df.mixture.GaussianMixture, mixture.GaussianMixture)
        self.assertIs(df.mixture.BayesianGaussianMixture,
                      mixture.BayesianGaussianMixture) 
Example 23
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_covariance_type():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2
    X = rng.rand(n_samples, n_features)

    covariance_type = 'bad_covariance_type'
    bgmm = BayesianGaussianMixture(covariance_type=covariance_type,
                                   random_state=rng)
    assert_raise_message(ValueError,
                         "Invalid value for 'covariance_type': %s "
                         "'covariance_type' should be in "
                         "['spherical', 'tied', 'diag', 'full']"
                         % covariance_type, bgmm.fit, X) 
Example 24
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_weight_concentration_prior_type():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2
    X = rng.rand(n_samples, n_features)

    bad_prior_type = 'bad_prior_type'
    bgmm = BayesianGaussianMixture(
        weight_concentration_prior_type=bad_prior_type, random_state=rng)
    assert_raise_message(ValueError,
                         "Invalid value for 'weight_concentration_prior_type':"
                         " %s 'weight_concentration_prior_type' should be in "
                         "['dirichlet_process', 'dirichlet_distribution']"
                         % bad_prior_type, bgmm.fit, X) 
Example 25
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_weights_prior_initialisation():
    rng = np.random.RandomState(0)
    n_samples, n_components, n_features = 10, 5, 2
    X = rng.rand(n_samples, n_features)

    # Check raise message for a bad value of weight_concentration_prior
    bad_weight_concentration_prior_ = 0.
    bgmm = BayesianGaussianMixture(
        weight_concentration_prior=bad_weight_concentration_prior_,
        random_state=0)
    assert_raise_message(ValueError,
                         "The parameter 'weight_concentration_prior' "
                         "should be greater than 0., but got %.3f."
                         % bad_weight_concentration_prior_,
                         bgmm.fit, X)

    # Check correct init for a given value of weight_concentration_prior
    weight_concentration_prior = rng.rand()
    bgmm = BayesianGaussianMixture(
        weight_concentration_prior=weight_concentration_prior,
        random_state=rng).fit(X)
    assert_almost_equal(weight_concentration_prior,
                        bgmm.weight_concentration_prior_)

    # Check correct init for the default value of weight_concentration_prior
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   random_state=rng).fit(X)
    assert_almost_equal(1. / n_components, bgmm.weight_concentration_prior_) 
Example 26
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_bayesian_mixture_check_is_fitted():
    rng = np.random.RandomState(0)
    n_samples, n_features = 10, 2

    # Check raise message
    bgmm = BayesianGaussianMixture(random_state=rng)
    X = rng.rand(n_samples, n_features)
    assert_raise_message(ValueError,
                         'This BayesianGaussianMixture instance is not '
                         'fitted yet.', bgmm.score, X) 
Example 27
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bayesian_mixture.py    License: MIT License 5 votes vote down vote up
def test_check_covariance_precision():
    # We check that the dot product of the covariance and the precision
    # matrices is identity.
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    n_components, n_features = 2 * rand_data.n_components, 2

    # Computation of the full_covariance
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   max_iter=100, random_state=rng, tol=1e-3,
                                   reg_covar=0)
    for covar_type in COVARIANCE_TYPE:
        bgmm.covariance_type = covar_type
        bgmm.fit(rand_data.X[covar_type])

        if covar_type == 'full':
            for covar, precision in zip(bgmm.covariances_, bgmm.precisions_):
                assert_almost_equal(np.dot(covar, precision),
                                    np.eye(n_features))
        elif covar_type == 'tied':
            assert_almost_equal(np.dot(bgmm.covariances_, bgmm.precisions_),
                                np.eye(n_features))

        elif covar_type == 'diag':
            assert_almost_equal(bgmm.covariances_ * bgmm.precisions_,
                                np.ones((n_components, n_features)))

        else:
            assert_almost_equal(bgmm.covariances_ * bgmm.precisions_,
                                np.ones(n_components)) 
Example 28
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 4 votes vote down vote up
def test_bayesian_mixture_mean_prior_initialisation():
    rng = np.random.RandomState(0)
    n_samples, n_components, n_features = 10, 3, 2
    X = rng.rand(n_samples, n_features)

    # Check raise message for a bad value of mean_precision_prior
    bad_mean_precision_prior_ = 0.
    bgmm = BayesianGaussianMixture(
        mean_precision_prior=bad_mean_precision_prior_,
        random_state=rng)
    assert_raise_message(ValueError,
                         "The parameter 'mean_precision_prior' should be "
                         "greater than 0., but got %.3f."
                         % bad_mean_precision_prior_,
                         bgmm.fit, X)

    # Check correct init for a given value of mean_precision_prior
    mean_precision_prior = rng.rand()
    bgmm = BayesianGaussianMixture(
        mean_precision_prior=mean_precision_prior,
        random_state=rng).fit(X)
    assert_almost_equal(mean_precision_prior, bgmm.mean_precision_prior_)

    # Check correct init for the default value of mean_precision_prior
    bgmm = BayesianGaussianMixture(random_state=rng).fit(X)
    assert_almost_equal(1., bgmm.mean_precision_prior_)

    # Check raise message for a bad shape of mean_prior
    mean_prior = rng.rand(n_features + 1)
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   mean_prior=mean_prior,
                                   random_state=rng)
    assert_raise_message(ValueError,
                         "The parameter 'means' should have the shape of ",
                         bgmm.fit, X)

    # Check correct init for a given value of mean_prior
    mean_prior = rng.rand(n_features)
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   mean_prior=mean_prior,
                                   random_state=rng).fit(X)
    assert_almost_equal(mean_prior, bgmm.mean_prior_)

    # Check correct init for the default value of bemean_priorta
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   random_state=rng).fit(X)
    assert_almost_equal(X.mean(axis=0), bgmm.mean_prior_) 
Example 29
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_bayesian_mixture.py    License: MIT License 4 votes vote down vote up
def test_compare_covar_type():
    # We can compare the 'full' precision with the other cov_type if we apply
    # 1 iter of the M-step (done during _initialize_parameters).
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng, scale=7)
    X = rand_data.X['full']
    n_components = rand_data.n_components

    for prior_type in PRIOR_TYPE:
        # Computation of the full_covariance
        bgmm = BayesianGaussianMixture(
            weight_concentration_prior_type=prior_type,
            n_components=2 * n_components, covariance_type='full',
            max_iter=1, random_state=0, tol=1e-7)
        bgmm._check_initial_parameters(X)
        bgmm._initialize_parameters(X, np.random.RandomState(0))
        full_covariances = (
            bgmm.covariances_ *
            bgmm.degrees_of_freedom_[:, np.newaxis, np.newaxis])

        # Check tied_covariance = mean(full_covariances, 0)
        bgmm = BayesianGaussianMixture(
            weight_concentration_prior_type=prior_type,
            n_components=2 * n_components, covariance_type='tied',
            max_iter=1, random_state=0, tol=1e-7)
        bgmm._check_initial_parameters(X)
        bgmm._initialize_parameters(X, np.random.RandomState(0))

        tied_covariance = bgmm.covariances_ * bgmm.degrees_of_freedom_
        assert_almost_equal(tied_covariance, np.mean(full_covariances, 0))

        # Check diag_covariance = diag(full_covariances)
        bgmm = BayesianGaussianMixture(
            weight_concentration_prior_type=prior_type,
            n_components=2 * n_components, covariance_type='diag',
            max_iter=1, random_state=0, tol=1e-7)
        bgmm._check_initial_parameters(X)
        bgmm._initialize_parameters(X, np.random.RandomState(0))

        diag_covariances = (bgmm.covariances_ *
                            bgmm.degrees_of_freedom_[:, np.newaxis])
        assert_almost_equal(diag_covariances,
                            np.array([np.diag(cov)
                                     for cov in full_covariances]))

        # Check spherical_covariance = np.mean(diag_covariances, 0)
        bgmm = BayesianGaussianMixture(
            weight_concentration_prior_type=prior_type,
            n_components=2 * n_components, covariance_type='spherical',
            max_iter=1, random_state=0, tol=1e-7)
        bgmm._check_initial_parameters(X)
        bgmm._initialize_parameters(X, np.random.RandomState(0))

        spherical_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_
        assert_almost_equal(
            spherical_covariances, np.mean(diag_covariances, 1)) 
Example 30
Project: twitter-stock-recommendation   Author: alvarobartt   File: test_bayesian_mixture.py    License: MIT License 4 votes vote down vote up
def test_bayesian_mixture_means_prior_initialisation():
    rng = np.random.RandomState(0)
    n_samples, n_components, n_features = 10, 3, 2
    X = rng.rand(n_samples, n_features)

    # Check raise message for a bad value of mean_precision_prior
    bad_mean_precision_prior_ = 0.
    bgmm = BayesianGaussianMixture(
        mean_precision_prior=bad_mean_precision_prior_,
        random_state=rng)
    assert_raise_message(ValueError,
                         "The parameter 'mean_precision_prior' should be "
                         "greater than 0., but got %.3f."
                         % bad_mean_precision_prior_,
                         bgmm.fit, X)

    # Check correct init for a given value of mean_precision_prior
    mean_precision_prior = rng.rand()
    bgmm = BayesianGaussianMixture(
        mean_precision_prior=mean_precision_prior,
        random_state=rng).fit(X)
    assert_almost_equal(mean_precision_prior, bgmm.mean_precision_prior_)

    # Check correct init for the default value of mean_precision_prior
    bgmm = BayesianGaussianMixture(random_state=rng).fit(X)
    assert_almost_equal(1., bgmm.mean_precision_prior_)

    # Check raise message for a bad shape of mean_prior
    mean_prior = rng.rand(n_features + 1)
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   mean_prior=mean_prior,
                                   random_state=rng)
    assert_raise_message(ValueError,
                         "The parameter 'means' should have the shape of ",
                         bgmm.fit, X)

    # Check correct init for a given value of mean_prior
    mean_prior = rng.rand(n_features)
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   mean_prior=mean_prior,
                                   random_state=rng).fit(X)
    assert_almost_equal(mean_prior, bgmm.mean_prior_)

    # Check correct init for the default value of bemean_priorta
    bgmm = BayesianGaussianMixture(n_components=n_components,
                                   random_state=rng).fit(X)
    assert_almost_equal(X.mean(axis=0), bgmm.mean_prior_)