Python sklearn.mixture.GMM Examples

The following are 30 code examples of sklearn.mixture.GMM(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.mixture , or try the search function .
Example #1
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_aic():
    # Test the aic and bic criteria
    n_samples, n_dim, n_components = 50, 3, 2
    X = rng.randn(n_samples, n_dim)
    SGH = 0.5 * (X.var() + np.log(2 * np.pi))  # standard gaussian entropy

    for cv_type in ['full', 'tied', 'diag', 'spherical']:
        g = mixture.GMM(n_components=n_components, covariance_type=cv_type,
                        random_state=rng, min_covar=1e-7)
        g.fit(X)
        aic = 2 * n_samples * SGH * n_dim + 2 * g._n_parameters()
        bic = (2 * n_samples * SGH * n_dim +
               np.log(n_samples) * g._n_parameters())
        bound = n_dim * 3. / np.sqrt(n_samples)
        assert_true(np.abs(g.aic(X) - aic) / n_samples < bound)
        assert_true(np.abs(g.bic(X) - bic) / n_samples < bound)


# This function tests the deprecated old GMM class 
Example #2
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_fit_predict():
    """
    test that gmm.fit_predict is equivalent to gmm.fit + gmm.predict
    """
    lrng = np.random.RandomState(101)

    n_samples, n_dim, n_comps = 100, 2, 2
    mu = np.array([[8, 8]])
    component_0 = lrng.randn(n_samples, n_dim)
    component_1 = lrng.randn(n_samples, n_dim) + mu
    X = np.vstack((component_0, component_1))

    for m_constructor in (mixture.GMM, mixture.VBGMM, mixture.DPGMM):
        model = m_constructor(n_components=n_comps, covariance_type='full',
                              min_covar=1e-7, n_iter=5,
                              random_state=np.random.RandomState(0))
        assert_fit_predict_correct(model, X)

    model = mixture.GMM(n_components=n_comps, n_iter=0)
    z = model.fit_predict(X)
    assert np.all(z == 0), "Quick Initialization Failed!"


# This function tests the deprecated old GMM class 
Example #3
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_train_1d(self, params='wmc'):
        # Train on 1-D data
        # Create a training set by sampling from the predefined
        # distribution.
        X = rng.randn(100, 1)
        # X.T[1:] = 0
        g = self.model(n_components=2,
                       covariance_type=self.covariance_type,
                       random_state=rng, min_covar=1e-7, n_iter=5,
                       init_params=params)
        with ignore_warnings(category=DeprecationWarning):
            g.fit(X)
            trainll = g.score(X)
            if isinstance(g, mixture.dpgmm._DPGMMBase):
                self.assertTrue(np.sum(np.abs(trainll / 100)) < 5)
            else:
                self.assertTrue(np.sum(np.abs(trainll / 100)) < 2)

    # This function tests the deprecated old GMM class 
Example #4
Source File: transforms.py    From omgh with MIT License 6 votes vote down vote up
def fit(self, data_generator, force=False, test=False):
        if force or not self.storage.check_exists(self.model_path):
            self._transform = GMM(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                n_iter=self.n_iter, n_init=self.n_init)

            def mid_generator():
                for t, des in data_generator:
                    yield des

            X = np.vstack(mid_generator())
            if test:
                X = X[0:100000, :]
            self._transform.fit(X)
            joblib.dump(self._transform, self.model_path)
        else:
            self._transform = joblib.load(self.model_path) 
Example #5
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_train_degenerate(self, params='wmc'):
        # Train on degenerate data with 0 in some dimensions
        # Create a training set by sampling from the predefined
        # distribution.
        X = rng.randn(100, self.n_features)
        X.T[1:] = 0
        g = self.model(n_components=2,
                       covariance_type=self.covariance_type,
                       random_state=rng, min_covar=1e-3, n_iter=5,
                       init_params=params)
        with ignore_warnings(category=DeprecationWarning):
            g.fit(X)
            trainll = g.score(X)
        self.assertTrue(np.sum(np.abs(trainll / 100 / X.shape[1])) < 5)

    # This function tests the deprecated old GMM class 
Example #6
Source File: beamformers_electrodes_tweak.py    From mmvt with GNU General Public License v3.0 6 votes vote down vote up
def find_best_n_componenets(X, electrodes, event_id, bipolar, from_t, to_t, time_split, meg_data_dic, elec_data, gk_sigma):
    cond = utils.first_key(event_id)
    all_errors = []
    all_clusters = []
    for n_components in range(1, X.shape[0]):
        gmm = mixture.GMM(n_components=n_components, covariance_type='spherical')
        gmm.fit(X)
        clusters = gmm.predict(X)
        unique_clusters = np.unique(clusters)
        cluster_errors = []
        for cluster in unique_clusters:
            cluster_electrodes = np.array(electrodes)[np.where(clusters == cluster)].tolist()
            elec_errors, elec_ps = reconstruct_meg(event_id, cluster_electrodes, from_t, to_t, time_split, gk_sigma=gk_sigma,
                                                   plot_results=False, bipolar=bipolar, dont_calc_new_csd=True, all_meg_data=meg_data_dic,
                                                   elec_data=elec_data, njobs=1)
            cluster_errors.append(max(elec_errors[cond].values()))
        print(n_components, max(cluster_errors))
        all_clusters.append(clusters)
        all_errors.append(max(cluster_errors))
    utils.save((all_clusters, all_errors), get_pkl_file('{}_best_n_componenets'.format(cond)))
    plt.plot(all_errors)
    plt.show() 
Example #7
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def _setUp(self):
        self.n_components = 10
        self.n_features = 4
        self.weights = rng.rand(self.n_components)
        self.weights = self.weights / self.weights.sum()
        self.means = rng.randint(-20, 20, (self.n_components, self.n_features))
        self.threshold = -0.5
        self.I = np.eye(self.n_features)
        self.covars = {
            'spherical': (0.1 + 2 * rng.rand(self.n_components,
                                             self.n_features)) ** 2,
            'tied': (make_spd_matrix(self.n_features, random_state=0)
                     + 5 * self.I),
            'diag': (0.1 + 2 * rng.rand(self.n_components,
                                        self.n_features)) ** 2,
            'full': np.array([make_spd_matrix(self.n_features, random_state=0)
                              + 5 * self.I for x in range(self.n_components)])}

    # This function tests the deprecated old GMM class 
Example #8
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_GMM_attributes():
    n_components, n_features = 10, 4
    covariance_type = 'diag'
    g = mixture.GMM(n_components, covariance_type, random_state=rng)
    weights = rng.rand(n_components)
    weights = weights / weights.sum()
    means = rng.randint(-20, 20, (n_components, n_features))

    assert_true(g.n_components == n_components)
    assert_true(g.covariance_type == covariance_type)

    g.weights_ = weights
    assert_array_almost_equal(g.weights_, weights)
    g.means_ = means
    assert_array_almost_equal(g.means_, means)

    covars = (0.1 + 2 * rng.rand(n_components, n_features)) ** 2
    g.covars_ = covars
    assert_array_almost_equal(g.covars_, covars)
    assert_raises(ValueError, g._set_covars, [])
    assert_raises(ValueError, g._set_covars,
                  np.zeros((n_components - 2, n_features)))

    assert_raises(ValueError, mixture.GMM, n_components=20,
                  covariance_type='badcovariance_type') 
Example #9
Source File: skgmm.py    From Conceptor with GNU General Public License v3.0 5 votes vote down vote up
def fit_new(self, x, label):
        self.y.append(label)
        gmm = GMM(self.gmm_order)
        gmm.fit(x)
        self.gmms.append(gmm) 
Example #10
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_1d_1component():
    # Test all of the covariance_types return the same BIC score for
    # 1-dimensional, 1 component fits.
    n_samples, n_dim, n_components = 100, 1, 1
    X = rng.randn(n_samples, n_dim)
    g_full = mixture.GMM(n_components=n_components, covariance_type='full',
                         random_state=rng, min_covar=1e-7, n_iter=1)
    with ignore_warnings(category=DeprecationWarning):
        g_full.fit(X)
        g_full_bic = g_full.bic(X)
        for cv_type in ['tied', 'diag', 'spherical']:
            g = mixture.GMM(n_components=n_components, covariance_type=cv_type,
                            random_state=rng, min_covar=1e-7, n_iter=1)
            g.fit(X)
            assert_array_almost_equal(g.bic(X), g_full_bic) 
Example #11
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_n_parameters():
    n_samples, n_dim, n_components = 7, 5, 2
    X = rng.randn(n_samples, n_dim)
    n_params = {'spherical': 13, 'diag': 21, 'tied': 26, 'full': 41}
    for cv_type in ['full', 'tied', 'diag', 'spherical']:
        with ignore_warnings(category=DeprecationWarning):
            g = mixture.GMM(n_components=n_components, covariance_type=cv_type,
                            random_state=rng, min_covar=1e-7, n_iter=1)
            g.fit(X)
            assert_true(g._n_parameters() == n_params[cv_type])


# This function tests the deprecated old GMM class 
Example #12
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_multiple_init():
    # Test that multiple inits does not much worse than a single one
    X = rng.randn(30, 5)
    X[:10] += 2
    g = mixture.GMM(n_components=2, covariance_type='spherical',
                    random_state=rng, min_covar=1e-7, n_iter=5)
    with ignore_warnings(category=DeprecationWarning):
        train1 = g.fit(X).score(X).sum()
        g.n_init = 5
        train2 = g.fit(X).score(X).sum()
    assert_true(train2 >= train1 - 1.e-2)


# This function tests the deprecated old GMM class 
Example #13
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_sample(self, n=100):
        g = self.model(n_components=self.n_components,
                       covariance_type=self.covariance_type,
                       random_state=rng)
        # Make sure the means are far apart so responsibilities.argmax()
        # picks the actual component used to generate the observations.
        g.means_ = 20 * self.means
        g.covars_ = np.maximum(self.covars[self.covariance_type], 0.1)
        g.weights_ = self.weights

        with ignore_warnings(category=DeprecationWarning):
            samples = g.sample(n)
        self.assertEqual(samples.shape, (n, self.n_features))

    # This function tests the deprecated old GMM class 
Example #14
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_eval(self):
        if not self.do_test_eval:
            return  # DPGMM does not support setting the means and
        # covariances before fitting There is no way of fixing this
        # due to the variational parameters being more expressive than
        # covariance matrices
        g = self.model(n_components=self.n_components,
                       covariance_type=self.covariance_type, random_state=rng)
        # Make sure the means are far apart so responsibilities.argmax()
        # picks the actual component used to generate the observations.
        g.means_ = 20 * self.means
        g.covars_ = self.covars[self.covariance_type]
        g.weights_ = self.weights

        gaussidx = np.repeat(np.arange(self.n_components), 5)
        n_samples = len(gaussidx)
        X = rng.randn(n_samples, self.n_features) + g.means_[gaussidx]

        with ignore_warnings(category=DeprecationWarning):
            ll, responsibilities = g.score_samples(X)

        self.assertEqual(len(ll), n_samples)
        self.assertEqual(responsibilities.shape,
                         (n_samples, self.n_components))
        assert_array_almost_equal(responsibilities.sum(axis=1),
                                  np.ones(n_samples))
        assert_array_equal(responsibilities.argmax(axis=1), gaussidx)

    # This function tests the deprecated old GMM class 
Example #15
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_lvmpdf_full_cv_non_positive_definite():
    n_features, n_samples = 2, 10
    rng = np.random.RandomState(0)
    X = rng.randint(10) * rng.rand(n_samples, n_features)
    mu = np.mean(X, 0)
    cv = np.array([[[-1, 0], [0, 1]]])
    expected_message = "'covars' must be symmetric, positive-definite"
    assert_raise_message(ValueError, expected_message,
                         mixture.log_multivariate_normal_density,
                         X, mu, cv, 'full')


# This function tests the deprecated old GMM class 
Example #16
Source File: skgmm.py    From Conceptor with GNU General Public License v3.0 5 votes vote down vote up
def fit_new(self, x, label):
        self.y.append(label)
        gmm = GMM(self.gmm_order)
        gmm.fit(x)
        self.gmms.append(gmm) 
Example #17
Source File: ClassficationBase.py    From AirTicketPredicting with MIT License 5 votes vote down vote up
def gmmRemovingOutlierForClassifier():
    """
    use GMM model to remove outlier
    :return: NA
    """
    # load data
    X_train = np.load('inputClf_small/X_train.npy')
    y_train = np.load('inputClf_small/y_train.npy')
    y_train_price = np.load('inputClf_small/y_train_price.npy')

    # classifier initialize
    classifier = GMM(n_components=2,covariance_type='full', init_params='wmc', n_iter=20)

    # cluster initializing
    X_train1 = X_train[np.where(y_train==0)[0], :]
    X_train2 = X_train[np.where(y_train==1)[0], :]
    cluster1 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train1)
    cluster1 = cluster1.cluster_centers_
    cluster2 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train2)
    cluster2 = cluster2.cluster_centers_
    clusters = np.concatenate((cluster1, cluster2), axis=0)

    classifier.means_ = clusters

    # Train the other parameters using the EM algorithm.
    classifier.fit(X_train)

    # predict
    y_train_pred = classifier.predict(X_train)
    train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
    print "Keep {}% data.".format(train_accuracy)


    # keep the data which are not outliers
    y_train_pred = y_train_pred.reshape((y_train_pred.shape[0], 1))
    X_train = X_train[np.where(y_train==y_train_pred)[0], :]
    y_train_price = y_train_price[np.where(y_train==y_train_pred)[0], :]
    y_train = y_train[np.where(y_train==y_train_pred)[0], :]
    np.save('inputClf_GMMOutlierRemoval/X_train', X_train)
    np.save('inputClf_GMMOutlierRemoval/y_train', y_train)
    np.save('inputClf_GMMOutlierRemoval/y_train_price', y_train_price) 
Example #18
Source File: beamformers_electrodes_tweak.py    From mmvt with GNU General Public License v3.0 5 votes vote down vote up
def analyze_best_n_componenets(event_id, bipolar, from_t, to_t, time_split, gk_sigma=3,
        electrodes_positive=True, electrodes_normalize=True, njobs=4):
    cond = utils.first_key(event_id)
    all_clusters, all_errors = utils.load(get_pkl_file('{}_best_n_componenets'.format(cond)))
    electrodes, errors, pss = utils.load(get_pkl_file('{}_calc_p_for_each_electrode.pkl'.format(cond)))
    elec_data = load_electrodes_data(event_id, bipolar, electrodes, from_t, to_t,
        subtract_min=electrodes_positive, normalize_data=electrodes_normalize)
    meg_data_dic = load_all_dics(freqs_bin, event_id, bipolar, electrodes, from_t, to_t, gk_sigma, dont_calc_new_csd=True, njobs=njobs)

    X = utils.stack(pss)
    for k, cluster_error in enumerate(all_errors):
        print(k, cluster_error)
    plt.plot(all_errors)
    plt.show()

    gmm = mixture.GMM(n_components=22, covariance_type='spherical')
    gmm.fit(X)
    clusters = gmm.predict(X)
    unique_clusters = np.unique(clusters)
    cluster_errors = []
    for cluster in unique_clusters:
        cluster_electrodes = np.array(electrodes)[np.where(clusters == cluster)].tolist()
        elec_errors, elec_ps = reconstruct_meg(event_id, cluster_electrodes, from_t, to_t, time_split, gk_sigma=gk_sigma,
                                               plot_results=True, bipolar=bipolar, dont_calc_new_csd=True, all_meg_data=meg_data_dic,
                                               elec_data=elec_data, njobs=1)
        cluster_errors.append(max(elec_errors[cond].values())) 
Example #19
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_positive_definite_covars():
    # Check positive definiteness for all covariance types
    for covariance_type in ["full", "tied", "diag", "spherical"]:
        yield check_positive_definite_covars, covariance_type


# This function tests the deprecated old GMM class 
Example #20
Source File: graph_cuts.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def estim_class_model_gmm(features, nb_classes, init='kmeans'):
    """ from all features estimate Gaussian Mixture Model and assuming
    each cluster is a single class compute probability that each feature
    belongs to each class

    :param [[float]] features: list of features per segment
    :param int nb_classes: number of classes
    :param int init: initialisation
    :return [[float]]: probabilities that each feature belongs to each class

    >>> np.random.seed(0)
    >>> fts = np.row_stack([np.random.random((50, 3)) - 1,
    ...                     np.random.random((50, 3)) + 1])
    >>> mm = estim_class_model_gmm(fts, 2)
    >>> mm.predict_proba(fts).shape
    (100, 2)
    """
    logging.debug('estimate GMM for all given features %r and %i component',
                  features.shape, nb_classes)
    # http://scikit-learn.org/stable/modules/generated/sklearn.mixture.GMM.html
    gmm = mixture.GaussianMixture(n_components=nb_classes,
                                  covariance_type='full', max_iter=99)
    if init == 'kmeans':
        # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
        kmeans = cluster.KMeans(n_clusters=nb_classes, init='k-means++',
                                n_jobs=-1)
        y = kmeans.fit_predict(features)
        gmm.fit(features, y)
    else:
        gmm.fit(features)
    logging.info('compute probability of each feature to all component')
    return gmm 
Example #21
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_verbose_first_level():
    # Create sample data
    X = rng.randn(30, 5)
    X[:10] += 2
    g = mixture.GMM(n_components=2, n_init=2, verbose=1)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        g.fit(X)
    finally:
        sys.stdout = old_stdout


# This function tests the deprecated old GMM class 
Example #22
Source File: graph_cuts.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def estim_gmm_params(features, prob):
    """ with given soft labeling (take the maxim) get the GMM parameters

    :param ndarray features:
    :param ndarray prob:
    :return:

    >>> np.random.seed(0)
    >>> prob = np.array([[1, 0]] * 30 + [[0, 1]] * 40)
    >>> fts = prob + np.random.random(prob.shape)
    >>> mm = estim_gmm_params(fts, prob)
    >>> mm['weights']
    [0.42857142857142855, 0.5714285714285714]
    >>> mm['means']
    array([[ 1.49537196,  0.53745455],
           [ 0.54199936,  1.42606497]])
    """
    nb_samples, nb_classes = prob.shape
    labels = np.argmax(prob, axis=1)
    gmm_params = {'weights': [], 'means': [], 'covars': []}
    for lb in range(nb_classes):
        labels_sel = (labels == lb)
        gmm_params['weights'].append(np.sum(labels_sel) / float(nb_samples))
        gmm_params['means'].append(np.mean(features[labels_sel], axis=0))
        gmm_params['covars'].append(np.cov(features[labels_sel]))
    for n in ['means', 'covars']:
        gmm_params[n] = np.array([m.tolist() for m in gmm_params[n]])
    return gmm_params 
Example #23
Source File: ModelsTrainer.py    From Voice-based-gender-recognition with MIT License 5 votes vote down vote up
def process(self):
        females, males = self.get_file_paths(self.females_training_path,
                                             self.males_training_path)
        # collect voice features
        female_voice_features = self.collect_features(females)
        male_voice_features   = self.collect_features(males)
        # generate gaussian mixture models
        females_gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3)
        males_gmm   = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3)
        # fit features to models
        females_gmm.fit(female_voice_features)
        males_gmm.fit(male_voice_features)
        # save models
        self.save_gmm(females_gmm, "females")
        self.save_gmm(males_gmm,   "males") 
Example #24
Source File: skill.py    From costar_plan with Apache License 2.0 5 votes vote down vote up
def GetGoalModel(self,objs,preset=None):
        if preset is None:
            robot = RobotFeatures()
        else:
            robot = RobotFeatures(preset=preset)

        if 'gripper' in objs:
            objs.remove('gripper')

        for obj in objs:
            robot.AddObject(obj)

        dims = robot.max_index
        K = self.action_model.n_components

        goal = GMM(n_components=K,covariance_type="full")
        goal.weights_ = self.action_model.weights_
        goal.means_ = np.zeros((K,dims))
        goal.covars_ = np.zeros((K,dims,dims))

        idx = robot.GetDiffIndices(objs)
        print objs

        for k in range(K):
            goal.means_[k,:] = self.action_model.means_[k,idx]
            for j in range(dims):
                goal.covars_[k,j,idx] = self.action_model.covars_[k,j,idx]

        return goal 
Example #25
Source File: formatter.py    From namsel with MIT License 5 votes vote down vote up
def get_gmm_for_stack(vals, thresh=10, num_sizes=2):
    '''  Assume no more than num_sizes font sizes present... '''
    if len(vals) > 30:
#         vals = [v for v in vals if vals.count(v) > 2]
#         counts = Counter(vals)
#         vals = [v for v in counts if counts[v] > 2]
        n_components = num_sizes
    elif len(vals) == 1:
        gmm = GMM()
        gmm.means_ = np.array(vals).reshape((1,1))
#         gmm.labels_ = np.array([0])
        return gmm
    else:
        n_components = 1
        
    gmm = GMM(n_components=2)
    gmm.fit(np.array(vals).reshape(len(vals), 1))    
    return gmm
    
#     while True:
#         gmm = GMM(n_components=n_components)
#         try:
#             gmm.fit(vals)
#         except:
#             print vals, n_components
#             raise
# #         gmm.labels_ = np.argsort(gmm.means_)
#         means = list(gmm.means_.copy().flatten())
#         means.sort()
#         if n_components > 1:
#             for i, m in enumerate(means[:-1]):
#                 if means[i+1] - m < thresh or not gmm.converged_:
#  
#                     n_components -= 1
#                     break
#             else:
#                 return gmm
#         elif n_components == 1:
#             return gmm 
Example #26
Source File: segmentation_labelling.py    From kaggle-heart with MIT License 4 votes vote down vote up
def extract_segmentation_model(blk, do_plot = False):
    # uses a single slice to extract segmentation model
    # still to check: does it improve if you use the whole stack?
    block = 1.0*blk.copy()
    #rescalefact = np.max(block)
    #block = block / rescalefact
    #lowerb=0.0
    #upperb=1.0
    ##lowerb=1.0*np.min(block[block > 0.05])
    ##upperb=1.0*np.max(block[block < 0.95])

    perc = 1.0*np.percentile(block[block>0], q=[10, 95])
    lowerb, upperb = perc[0], perc[1]
    rescalefact = 1.0
    block[block>0] = np.clip(1. * (block[block>0] - lowerb) / (upperb - lowerb), 0.0, 1.0)

    obs = block[block>0.]
    obs = np.reshape(obs,(obs.shape[0],1))

    if do_plot:
        plt.figure()
        plt.hist(obs.ravel())
        plt.show()

    g = mixture.GMM(n_components=2)
    g.fit(obs)
    bloodlabel = g.predict(((0.,),(1.,)))

    # find threshold up to 1e-5 accuracy
    accuracy = 1.0e-5
    xmax= 1.
    xmin = 0.
    while (xmax - xmin) > accuracy:
        newp = (xmax + xmin)/2.
        newl = g.predict([newp])
        if newl == bloodlabel[0]:
            xmin = newp
        else:
            xmax = newp
        #print "[" + str(xmin) + "," + str(xmax) + "]"

    threshold = (xmax + xmin)/2.
    #print "GMM Threshold = " + str(threshold)

    return threshold, bloodlabel, lowerb*rescalefact, upperb*rescalefact 
Example #27
Source File: test_gmm.py    From twitter-stock-recommendation with MIT License 4 votes vote down vote up
def check_positive_definite_covars(covariance_type):
    r"""Test that covariance matrices do not become non positive definite

    Due to the accumulation of round-off errors, the computation of the
    covariance  matrices during the learning phase could lead to non-positive
    definite covariance matrices. Namely the use of the formula:

    .. math:: C = (\sum_i w_i  x_i x_i^T) - \mu \mu^T

    instead of:

    .. math:: C = \sum_i w_i (x_i - \mu)(x_i - \mu)^T

    while mathematically equivalent, was observed a ``LinAlgError`` exception,
    when computing a ``GMM`` with full covariance matrices and fixed mean.

    This function ensures that some later optimization will not introduce the
    problem again.
    """
    rng = np.random.RandomState(1)
    # we build a dataset with 2 2d component. The components are unbalanced
    # (respective weights 0.9 and 0.1)
    X = rng.randn(100, 2)
    X[-10:] += (3, 3)  # Shift the 10 last points

    gmm = mixture.GMM(2, params="wc", covariance_type=covariance_type,
                      min_covar=1e-3)

    # This is a non-regression test for issue #2640. The following call used
    # to trigger:
    # numpy.linalg.linalg.LinAlgError: 2-th leading minor not positive definite
    gmm.fit(X)

    if covariance_type == "diag" or covariance_type == "spherical":
        assert_greater(gmm.covars_.min(), 0)
    else:
        if covariance_type == "tied":
            covs = [gmm.covars_]
        else:
            covs = gmm.covars_

        for c in covs:
            assert_greater(np.linalg.det(c), 0) 
Example #28
Source File: utils.py    From mmvt with GNU General Public License v3.0 4 votes vote down vote up
def calc_clusters_bic(X, n_components=0, do_plot=True):
    from sklearn import mixture
    import itertools
    if do_plot:
        import matplotlib.pyplot as plt

    lowest_bic = np.infty
    bic = []
    if n_components==0:
        n_components = X.shape[0]
    n_components_range = range(1, n_components)
    cv_types = ['spherical', 'diag']#, 'tied'] # 'full'
    res = defaultdict(dict)
    for cv_type in cv_types:
        for n_components in n_components_range:
            # Fit a mixture of Gaussians with EM
            gmm = mixture.GMM(n_components=n_components, covariance_type=cv_type)
            gmm.fit(X)
            bic.append(gmm.bic(X))
            res[cv_type][n_components] = gmm
            if bic[-1] < lowest_bic:
                lowest_bic = bic[-1]
                best_gmm = gmm

    bic = np.array(bic)

    if do_plot:
        # Plot the BIC scores
        color_iter = itertools.cycle(['k', 'r', 'g', 'b', 'c', 'm', 'y'])
        bars = []
        spl = plt.subplot(1, 1, 1)
        for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)):
            xpos = np.array(n_components_range) + .2 * (i - 2)
            bars.append(plt.bar(xpos, bic[i * len(n_components_range):
                                          (i + 1) * len(n_components_range)],
                                width=.2, color=color))
        plt.xticks(n_components_range)
        plt.ylim([bic.min() * 1.01 - .01 * bic.max(), bic.max()])
        plt.title('BIC score per model')
        xpos = np.mod(bic.argmin(), len(n_components_range)) + .65 +\
            .2 * np.floor(bic.argmin() / len(n_components_range))
        plt.text(xpos, bic.min() * 0.97 + .03 * bic.max(), '*', fontsize=14)
        spl.set_xlabel('Number of components')
        spl.legend([b[0] for b in bars], cv_types)
        plt.show()
    return res, best_gmm, bic 
Example #29
Source File: graph_cuts.py    From pyImSegm with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def compute_multivarian_otsu(features):
    """ compute otsu individually over each sample dimension
    WARNING: this compute only localy  and since it does compare all
    combinations of orienting the asign for tight cases it may not decide

    :param ndarray features:
    :return list(bool):

    >>> np.random.seed(0)
    >>> fts = np.row_stack([np.random.random((5, 3)) - 1,
    ...                     np.random.random((5, 3)) + 1])
    >>> fts[:, 1] = - fts[:, 1]
    >>> compute_multivarian_otsu(fts).astype(int)
    array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
    """
    ys = np.zeros(features.shape)
    for i in range(features.shape[-1]):
        thr = filters.threshold_otsu(features[:, i])
        asign = features[:, i] > thr
        if i > 0:
            m = np.mean(ys[:, :i], axis=1)
            d1 = np.mean(np.abs(asign - m))
            d2 = np.mean(np.abs(~asign - m))
            # check if for this dimension it wount be better to swap it
            if d2 < d1:
                asign = ~asign
        ys[:, i] = asign
    y = np.mean(ys, axis=1) > 0.5
    return y


# def estim_class_model_gmm(features, nb_classes, init='kmeans'):
#     """ from all features estimate Gaussian Mixture Model and assuming
#     each cluster is a single class compute probability that each feature
#     belongs to each class
#
#     :param [[float]] features: list of features per segment
#     :param int nb_classes: number of classes
#     :return [[float]]: probabilities that each feature belongs to each class
#     """
#     logging.debug('estimate GMM for all given features %s and %i component',
#                   repr(features.shape), nb_classes)
#     # http://scikit-learn.org/stable/modules/generated/sklearn.mixture.GMM.html
#     mm = mixture.GMM(n_components=nb_classes, covariance_type='full', n_iter=999)
#     if init == 'kmeans':
#         # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
#         kmeans = cluster.KMeans(n_clusters=nb_classes, init='k-means++', n_jobs=-1)
#         y = kmeans.fit_predict(features)
#         mm.fit(features, y)
#     else:
#         mm.fit(features)
#     logging.info('compute probability of each feature to all component')
#     return mm 
Example #30
Source File: page_elements2.py    From namsel with MIT License 4 votes vote down vote up
def char_gaussians(cls, widths):
        
        widths = np.array(widths)
        widths.shape = (len(widths),1)
        cls.median_width = np.median(widths)
        
        gmm = GMM(n_components = 2, n_iter=100)
        try:
            gmm.fit(widths)
        except ValueError:
            return (0,0,0,0)
        means = gmm.means_
        stds = np.sqrt(gmm.covars_)
        cls.gmm = gmm
        char_mean_ind = np.argmax(means)
        char_mean = float(means[char_mean_ind]) # Page character width mean
        char_std = float(stds[char_mean_ind][0]) # Page character std dev
        
        cls.tsek_mean_ind = np.argmin(means)
        
        tsek_mean = float(means[cls.tsek_mean_ind])
        tsek_std = float(stds[cls.tsek_mean_ind][0])
#        print gmm.converged_, 'converged'
        return (char_mean, char_std, tsek_mean, tsek_std)

#    def _gaussians(self, widths):
##        print widths
#        widths = np.array(widths)
#        widths.shape = (len(widths),1)
#        
#        gmm = GMM(n_components = 3, n_iter=100)
#        try:
#            gmm.fit(widths)
#        except ValueError:
#            return (0,0,0,0)
#        means = gmm.means_
#        stds = np.sqrt(gmm.covars_)
#        
#        argm = np.argmin(means)
#        self.smlmean = means[argm]
#        self.smstd = stds[argm]
        
#        cls.gmm = gmm
#        print gmm.converged_, 'converged'
#        from matplotlib import pyplot as plt
#        from matplotlib.mlab import normpdf
##        plt.subplot(211)
#        plt.title('tsek-char distributions, pre-segmentation')
#        
#        n,bins,p = plt.hist(widths, 200, range=(0,75), normed=True, color='#3B60FA')
##        plt.vlines(means, 0, np.array([max(n), max(n)]), linestyles='--')
#        for i, m in enumerate(means):
#            
#            plt.plot(bins, normpdf(bins, means[i], stds[i]),  label='fit', linewidth=1)
#            plt.fill_between(bins, normpdf(bins, means[i], stds[i]), color=(.58,.63,.8), alpha=0.09)
#
#        plt.show()