Python sklearn.mixture.GMM Examples
The following are 30
code examples of sklearn.mixture.GMM().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.mixture
, or try the search function
.
Example #1
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_aic(): # Test the aic and bic criteria n_samples, n_dim, n_components = 50, 3, 2 X = rng.randn(n_samples, n_dim) SGH = 0.5 * (X.var() + np.log(2 * np.pi)) # standard gaussian entropy for cv_type in ['full', 'tied', 'diag', 'spherical']: g = mixture.GMM(n_components=n_components, covariance_type=cv_type, random_state=rng, min_covar=1e-7) g.fit(X) aic = 2 * n_samples * SGH * n_dim + 2 * g._n_parameters() bic = (2 * n_samples * SGH * n_dim + np.log(n_samples) * g._n_parameters()) bound = n_dim * 3. / np.sqrt(n_samples) assert_true(np.abs(g.aic(X) - aic) / n_samples < bound) assert_true(np.abs(g.bic(X) - bic) / n_samples < bound) # This function tests the deprecated old GMM class
Example #2
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_fit_predict(): """ test that gmm.fit_predict is equivalent to gmm.fit + gmm.predict """ lrng = np.random.RandomState(101) n_samples, n_dim, n_comps = 100, 2, 2 mu = np.array([[8, 8]]) component_0 = lrng.randn(n_samples, n_dim) component_1 = lrng.randn(n_samples, n_dim) + mu X = np.vstack((component_0, component_1)) for m_constructor in (mixture.GMM, mixture.VBGMM, mixture.DPGMM): model = m_constructor(n_components=n_comps, covariance_type='full', min_covar=1e-7, n_iter=5, random_state=np.random.RandomState(0)) assert_fit_predict_correct(model, X) model = mixture.GMM(n_components=n_comps, n_iter=0) z = model.fit_predict(X) assert np.all(z == 0), "Quick Initialization Failed!" # This function tests the deprecated old GMM class
Example #3
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_train_1d(self, params='wmc'): # Train on 1-D data # Create a training set by sampling from the predefined # distribution. X = rng.randn(100, 1) # X.T[1:] = 0 g = self.model(n_components=2, covariance_type=self.covariance_type, random_state=rng, min_covar=1e-7, n_iter=5, init_params=params) with ignore_warnings(category=DeprecationWarning): g.fit(X) trainll = g.score(X) if isinstance(g, mixture.dpgmm._DPGMMBase): self.assertTrue(np.sum(np.abs(trainll / 100)) < 5) else: self.assertTrue(np.sum(np.abs(trainll / 100)) < 2) # This function tests the deprecated old GMM class
Example #4
Source File: transforms.py From omgh with MIT License | 6 votes |
def fit(self, data_generator, force=False, test=False): if force or not self.storage.check_exists(self.model_path): self._transform = GMM( n_components=self.n_components, covariance_type=self.covariance_type, n_iter=self.n_iter, n_init=self.n_init) def mid_generator(): for t, des in data_generator: yield des X = np.vstack(mid_generator()) if test: X = X[0:100000, :] self._transform.fit(X) joblib.dump(self._transform, self.model_path) else: self._transform = joblib.load(self.model_path)
Example #5
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_train_degenerate(self, params='wmc'): # Train on degenerate data with 0 in some dimensions # Create a training set by sampling from the predefined # distribution. X = rng.randn(100, self.n_features) X.T[1:] = 0 g = self.model(n_components=2, covariance_type=self.covariance_type, random_state=rng, min_covar=1e-3, n_iter=5, init_params=params) with ignore_warnings(category=DeprecationWarning): g.fit(X) trainll = g.score(X) self.assertTrue(np.sum(np.abs(trainll / 100 / X.shape[1])) < 5) # This function tests the deprecated old GMM class
Example #6
Source File: beamformers_electrodes_tweak.py From mmvt with GNU General Public License v3.0 | 6 votes |
def find_best_n_componenets(X, electrodes, event_id, bipolar, from_t, to_t, time_split, meg_data_dic, elec_data, gk_sigma): cond = utils.first_key(event_id) all_errors = [] all_clusters = [] for n_components in range(1, X.shape[0]): gmm = mixture.GMM(n_components=n_components, covariance_type='spherical') gmm.fit(X) clusters = gmm.predict(X) unique_clusters = np.unique(clusters) cluster_errors = [] for cluster in unique_clusters: cluster_electrodes = np.array(electrodes)[np.where(clusters == cluster)].tolist() elec_errors, elec_ps = reconstruct_meg(event_id, cluster_electrodes, from_t, to_t, time_split, gk_sigma=gk_sigma, plot_results=False, bipolar=bipolar, dont_calc_new_csd=True, all_meg_data=meg_data_dic, elec_data=elec_data, njobs=1) cluster_errors.append(max(elec_errors[cond].values())) print(n_components, max(cluster_errors)) all_clusters.append(clusters) all_errors.append(max(cluster_errors)) utils.save((all_clusters, all_errors), get_pkl_file('{}_best_n_componenets'.format(cond))) plt.plot(all_errors) plt.show()
Example #7
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 6 votes |
def _setUp(self): self.n_components = 10 self.n_features = 4 self.weights = rng.rand(self.n_components) self.weights = self.weights / self.weights.sum() self.means = rng.randint(-20, 20, (self.n_components, self.n_features)) self.threshold = -0.5 self.I = np.eye(self.n_features) self.covars = { 'spherical': (0.1 + 2 * rng.rand(self.n_components, self.n_features)) ** 2, 'tied': (make_spd_matrix(self.n_features, random_state=0) + 5 * self.I), 'diag': (0.1 + 2 * rng.rand(self.n_components, self.n_features)) ** 2, 'full': np.array([make_spd_matrix(self.n_features, random_state=0) + 5 * self.I for x in range(self.n_components)])} # This function tests the deprecated old GMM class
Example #8
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_GMM_attributes(): n_components, n_features = 10, 4 covariance_type = 'diag' g = mixture.GMM(n_components, covariance_type, random_state=rng) weights = rng.rand(n_components) weights = weights / weights.sum() means = rng.randint(-20, 20, (n_components, n_features)) assert_true(g.n_components == n_components) assert_true(g.covariance_type == covariance_type) g.weights_ = weights assert_array_almost_equal(g.weights_, weights) g.means_ = means assert_array_almost_equal(g.means_, means) covars = (0.1 + 2 * rng.rand(n_components, n_features)) ** 2 g.covars_ = covars assert_array_almost_equal(g.covars_, covars) assert_raises(ValueError, g._set_covars, []) assert_raises(ValueError, g._set_covars, np.zeros((n_components - 2, n_features))) assert_raises(ValueError, mixture.GMM, n_components=20, covariance_type='badcovariance_type')
Example #9
Source File: skgmm.py From Conceptor with GNU General Public License v3.0 | 5 votes |
def fit_new(self, x, label): self.y.append(label) gmm = GMM(self.gmm_order) gmm.fit(x) self.gmms.append(gmm)
Example #10
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_1d_1component(): # Test all of the covariance_types return the same BIC score for # 1-dimensional, 1 component fits. n_samples, n_dim, n_components = 100, 1, 1 X = rng.randn(n_samples, n_dim) g_full = mixture.GMM(n_components=n_components, covariance_type='full', random_state=rng, min_covar=1e-7, n_iter=1) with ignore_warnings(category=DeprecationWarning): g_full.fit(X) g_full_bic = g_full.bic(X) for cv_type in ['tied', 'diag', 'spherical']: g = mixture.GMM(n_components=n_components, covariance_type=cv_type, random_state=rng, min_covar=1e-7, n_iter=1) g.fit(X) assert_array_almost_equal(g.bic(X), g_full_bic)
Example #11
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_n_parameters(): n_samples, n_dim, n_components = 7, 5, 2 X = rng.randn(n_samples, n_dim) n_params = {'spherical': 13, 'diag': 21, 'tied': 26, 'full': 41} for cv_type in ['full', 'tied', 'diag', 'spherical']: with ignore_warnings(category=DeprecationWarning): g = mixture.GMM(n_components=n_components, covariance_type=cv_type, random_state=rng, min_covar=1e-7, n_iter=1) g.fit(X) assert_true(g._n_parameters() == n_params[cv_type]) # This function tests the deprecated old GMM class
Example #12
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_multiple_init(): # Test that multiple inits does not much worse than a single one X = rng.randn(30, 5) X[:10] += 2 g = mixture.GMM(n_components=2, covariance_type='spherical', random_state=rng, min_covar=1e-7, n_iter=5) with ignore_warnings(category=DeprecationWarning): train1 = g.fit(X).score(X).sum() g.n_init = 5 train2 = g.fit(X).score(X).sum() assert_true(train2 >= train1 - 1.e-2) # This function tests the deprecated old GMM class
Example #13
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_sample(self, n=100): g = self.model(n_components=self.n_components, covariance_type=self.covariance_type, random_state=rng) # Make sure the means are far apart so responsibilities.argmax() # picks the actual component used to generate the observations. g.means_ = 20 * self.means g.covars_ = np.maximum(self.covars[self.covariance_type], 0.1) g.weights_ = self.weights with ignore_warnings(category=DeprecationWarning): samples = g.sample(n) self.assertEqual(samples.shape, (n, self.n_features)) # This function tests the deprecated old GMM class
Example #14
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_eval(self): if not self.do_test_eval: return # DPGMM does not support setting the means and # covariances before fitting There is no way of fixing this # due to the variational parameters being more expressive than # covariance matrices g = self.model(n_components=self.n_components, covariance_type=self.covariance_type, random_state=rng) # Make sure the means are far apart so responsibilities.argmax() # picks the actual component used to generate the observations. g.means_ = 20 * self.means g.covars_ = self.covars[self.covariance_type] g.weights_ = self.weights gaussidx = np.repeat(np.arange(self.n_components), 5) n_samples = len(gaussidx) X = rng.randn(n_samples, self.n_features) + g.means_[gaussidx] with ignore_warnings(category=DeprecationWarning): ll, responsibilities = g.score_samples(X) self.assertEqual(len(ll), n_samples) self.assertEqual(responsibilities.shape, (n_samples, self.n_components)) assert_array_almost_equal(responsibilities.sum(axis=1), np.ones(n_samples)) assert_array_equal(responsibilities.argmax(axis=1), gaussidx) # This function tests the deprecated old GMM class
Example #15
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_lvmpdf_full_cv_non_positive_definite(): n_features, n_samples = 2, 10 rng = np.random.RandomState(0) X = rng.randint(10) * rng.rand(n_samples, n_features) mu = np.mean(X, 0) cv = np.array([[[-1, 0], [0, 1]]]) expected_message = "'covars' must be symmetric, positive-definite" assert_raise_message(ValueError, expected_message, mixture.log_multivariate_normal_density, X, mu, cv, 'full') # This function tests the deprecated old GMM class
Example #16
Source File: skgmm.py From Conceptor with GNU General Public License v3.0 | 5 votes |
def fit_new(self, x, label): self.y.append(label) gmm = GMM(self.gmm_order) gmm.fit(x) self.gmms.append(gmm)
Example #17
Source File: ClassficationBase.py From AirTicketPredicting with MIT License | 5 votes |
def gmmRemovingOutlierForClassifier(): """ use GMM model to remove outlier :return: NA """ # load data X_train = np.load('inputClf_small/X_train.npy') y_train = np.load('inputClf_small/y_train.npy') y_train_price = np.load('inputClf_small/y_train_price.npy') # classifier initialize classifier = GMM(n_components=2,covariance_type='full', init_params='wmc', n_iter=20) # cluster initializing X_train1 = X_train[np.where(y_train==0)[0], :] X_train2 = X_train[np.where(y_train==1)[0], :] cluster1 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train1) cluster1 = cluster1.cluster_centers_ cluster2 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train2) cluster2 = cluster2.cluster_centers_ clusters = np.concatenate((cluster1, cluster2), axis=0) classifier.means_ = clusters # Train the other parameters using the EM algorithm. classifier.fit(X_train) # predict y_train_pred = classifier.predict(X_train) train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100 print "Keep {}% data.".format(train_accuracy) # keep the data which are not outliers y_train_pred = y_train_pred.reshape((y_train_pred.shape[0], 1)) X_train = X_train[np.where(y_train==y_train_pred)[0], :] y_train_price = y_train_price[np.where(y_train==y_train_pred)[0], :] y_train = y_train[np.where(y_train==y_train_pred)[0], :] np.save('inputClf_GMMOutlierRemoval/X_train', X_train) np.save('inputClf_GMMOutlierRemoval/y_train', y_train) np.save('inputClf_GMMOutlierRemoval/y_train_price', y_train_price)
Example #18
Source File: beamformers_electrodes_tweak.py From mmvt with GNU General Public License v3.0 | 5 votes |
def analyze_best_n_componenets(event_id, bipolar, from_t, to_t, time_split, gk_sigma=3, electrodes_positive=True, electrodes_normalize=True, njobs=4): cond = utils.first_key(event_id) all_clusters, all_errors = utils.load(get_pkl_file('{}_best_n_componenets'.format(cond))) electrodes, errors, pss = utils.load(get_pkl_file('{}_calc_p_for_each_electrode.pkl'.format(cond))) elec_data = load_electrodes_data(event_id, bipolar, electrodes, from_t, to_t, subtract_min=electrodes_positive, normalize_data=electrodes_normalize) meg_data_dic = load_all_dics(freqs_bin, event_id, bipolar, electrodes, from_t, to_t, gk_sigma, dont_calc_new_csd=True, njobs=njobs) X = utils.stack(pss) for k, cluster_error in enumerate(all_errors): print(k, cluster_error) plt.plot(all_errors) plt.show() gmm = mixture.GMM(n_components=22, covariance_type='spherical') gmm.fit(X) clusters = gmm.predict(X) unique_clusters = np.unique(clusters) cluster_errors = [] for cluster in unique_clusters: cluster_electrodes = np.array(electrodes)[np.where(clusters == cluster)].tolist() elec_errors, elec_ps = reconstruct_meg(event_id, cluster_electrodes, from_t, to_t, time_split, gk_sigma=gk_sigma, plot_results=True, bipolar=bipolar, dont_calc_new_csd=True, all_meg_data=meg_data_dic, elec_data=elec_data, njobs=1) cluster_errors.append(max(elec_errors[cond].values()))
Example #19
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_positive_definite_covars(): # Check positive definiteness for all covariance types for covariance_type in ["full", "tied", "diag", "spherical"]: yield check_positive_definite_covars, covariance_type # This function tests the deprecated old GMM class
Example #20
Source File: graph_cuts.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def estim_class_model_gmm(features, nb_classes, init='kmeans'): """ from all features estimate Gaussian Mixture Model and assuming each cluster is a single class compute probability that each feature belongs to each class :param [[float]] features: list of features per segment :param int nb_classes: number of classes :param int init: initialisation :return [[float]]: probabilities that each feature belongs to each class >>> np.random.seed(0) >>> fts = np.row_stack([np.random.random((50, 3)) - 1, ... np.random.random((50, 3)) + 1]) >>> mm = estim_class_model_gmm(fts, 2) >>> mm.predict_proba(fts).shape (100, 2) """ logging.debug('estimate GMM for all given features %r and %i component', features.shape, nb_classes) # http://scikit-learn.org/stable/modules/generated/sklearn.mixture.GMM.html gmm = mixture.GaussianMixture(n_components=nb_classes, covariance_type='full', max_iter=99) if init == 'kmeans': # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html kmeans = cluster.KMeans(n_clusters=nb_classes, init='k-means++', n_jobs=-1) y = kmeans.fit_predict(features) gmm.fit(features, y) else: gmm.fit(features) logging.info('compute probability of each feature to all component') return gmm
Example #21
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_verbose_first_level(): # Create sample data X = rng.randn(30, 5) X[:10] += 2 g = mixture.GMM(n_components=2, n_init=2, verbose=1) old_stdout = sys.stdout sys.stdout = StringIO() try: g.fit(X) finally: sys.stdout = old_stdout # This function tests the deprecated old GMM class
Example #22
Source File: graph_cuts.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def estim_gmm_params(features, prob): """ with given soft labeling (take the maxim) get the GMM parameters :param ndarray features: :param ndarray prob: :return: >>> np.random.seed(0) >>> prob = np.array([[1, 0]] * 30 + [[0, 1]] * 40) >>> fts = prob + np.random.random(prob.shape) >>> mm = estim_gmm_params(fts, prob) >>> mm['weights'] [0.42857142857142855, 0.5714285714285714] >>> mm['means'] array([[ 1.49537196, 0.53745455], [ 0.54199936, 1.42606497]]) """ nb_samples, nb_classes = prob.shape labels = np.argmax(prob, axis=1) gmm_params = {'weights': [], 'means': [], 'covars': []} for lb in range(nb_classes): labels_sel = (labels == lb) gmm_params['weights'].append(np.sum(labels_sel) / float(nb_samples)) gmm_params['means'].append(np.mean(features[labels_sel], axis=0)) gmm_params['covars'].append(np.cov(features[labels_sel])) for n in ['means', 'covars']: gmm_params[n] = np.array([m.tolist() for m in gmm_params[n]]) return gmm_params
Example #23
Source File: ModelsTrainer.py From Voice-based-gender-recognition with MIT License | 5 votes |
def process(self): females, males = self.get_file_paths(self.females_training_path, self.males_training_path) # collect voice features female_voice_features = self.collect_features(females) male_voice_features = self.collect_features(males) # generate gaussian mixture models females_gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3) males_gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3) # fit features to models females_gmm.fit(female_voice_features) males_gmm.fit(male_voice_features) # save models self.save_gmm(females_gmm, "females") self.save_gmm(males_gmm, "males")
Example #24
Source File: skill.py From costar_plan with Apache License 2.0 | 5 votes |
def GetGoalModel(self,objs,preset=None): if preset is None: robot = RobotFeatures() else: robot = RobotFeatures(preset=preset) if 'gripper' in objs: objs.remove('gripper') for obj in objs: robot.AddObject(obj) dims = robot.max_index K = self.action_model.n_components goal = GMM(n_components=K,covariance_type="full") goal.weights_ = self.action_model.weights_ goal.means_ = np.zeros((K,dims)) goal.covars_ = np.zeros((K,dims,dims)) idx = robot.GetDiffIndices(objs) print objs for k in range(K): goal.means_[k,:] = self.action_model.means_[k,idx] for j in range(dims): goal.covars_[k,j,idx] = self.action_model.covars_[k,j,idx] return goal
Example #25
Source File: formatter.py From namsel with MIT License | 5 votes |
def get_gmm_for_stack(vals, thresh=10, num_sizes=2): ''' Assume no more than num_sizes font sizes present... ''' if len(vals) > 30: # vals = [v for v in vals if vals.count(v) > 2] # counts = Counter(vals) # vals = [v for v in counts if counts[v] > 2] n_components = num_sizes elif len(vals) == 1: gmm = GMM() gmm.means_ = np.array(vals).reshape((1,1)) # gmm.labels_ = np.array([0]) return gmm else: n_components = 1 gmm = GMM(n_components=2) gmm.fit(np.array(vals).reshape(len(vals), 1)) return gmm # while True: # gmm = GMM(n_components=n_components) # try: # gmm.fit(vals) # except: # print vals, n_components # raise # # gmm.labels_ = np.argsort(gmm.means_) # means = list(gmm.means_.copy().flatten()) # means.sort() # if n_components > 1: # for i, m in enumerate(means[:-1]): # if means[i+1] - m < thresh or not gmm.converged_: # # n_components -= 1 # break # else: # return gmm # elif n_components == 1: # return gmm
Example #26
Source File: segmentation_labelling.py From kaggle-heart with MIT License | 4 votes |
def extract_segmentation_model(blk, do_plot = False): # uses a single slice to extract segmentation model # still to check: does it improve if you use the whole stack? block = 1.0*blk.copy() #rescalefact = np.max(block) #block = block / rescalefact #lowerb=0.0 #upperb=1.0 ##lowerb=1.0*np.min(block[block > 0.05]) ##upperb=1.0*np.max(block[block < 0.95]) perc = 1.0*np.percentile(block[block>0], q=[10, 95]) lowerb, upperb = perc[0], perc[1] rescalefact = 1.0 block[block>0] = np.clip(1. * (block[block>0] - lowerb) / (upperb - lowerb), 0.0, 1.0) obs = block[block>0.] obs = np.reshape(obs,(obs.shape[0],1)) if do_plot: plt.figure() plt.hist(obs.ravel()) plt.show() g = mixture.GMM(n_components=2) g.fit(obs) bloodlabel = g.predict(((0.,),(1.,))) # find threshold up to 1e-5 accuracy accuracy = 1.0e-5 xmax= 1. xmin = 0. while (xmax - xmin) > accuracy: newp = (xmax + xmin)/2. newl = g.predict([newp]) if newl == bloodlabel[0]: xmin = newp else: xmax = newp #print "[" + str(xmin) + "," + str(xmax) + "]" threshold = (xmax + xmin)/2. #print "GMM Threshold = " + str(threshold) return threshold, bloodlabel, lowerb*rescalefact, upperb*rescalefact
Example #27
Source File: test_gmm.py From twitter-stock-recommendation with MIT License | 4 votes |
def check_positive_definite_covars(covariance_type): r"""Test that covariance matrices do not become non positive definite Due to the accumulation of round-off errors, the computation of the covariance matrices during the learning phase could lead to non-positive definite covariance matrices. Namely the use of the formula: .. math:: C = (\sum_i w_i x_i x_i^T) - \mu \mu^T instead of: .. math:: C = \sum_i w_i (x_i - \mu)(x_i - \mu)^T while mathematically equivalent, was observed a ``LinAlgError`` exception, when computing a ``GMM`` with full covariance matrices and fixed mean. This function ensures that some later optimization will not introduce the problem again. """ rng = np.random.RandomState(1) # we build a dataset with 2 2d component. The components are unbalanced # (respective weights 0.9 and 0.1) X = rng.randn(100, 2) X[-10:] += (3, 3) # Shift the 10 last points gmm = mixture.GMM(2, params="wc", covariance_type=covariance_type, min_covar=1e-3) # This is a non-regression test for issue #2640. The following call used # to trigger: # numpy.linalg.linalg.LinAlgError: 2-th leading minor not positive definite gmm.fit(X) if covariance_type == "diag" or covariance_type == "spherical": assert_greater(gmm.covars_.min(), 0) else: if covariance_type == "tied": covs = [gmm.covars_] else: covs = gmm.covars_ for c in covs: assert_greater(np.linalg.det(c), 0)
Example #28
Source File: utils.py From mmvt with GNU General Public License v3.0 | 4 votes |
def calc_clusters_bic(X, n_components=0, do_plot=True): from sklearn import mixture import itertools if do_plot: import matplotlib.pyplot as plt lowest_bic = np.infty bic = [] if n_components==0: n_components = X.shape[0] n_components_range = range(1, n_components) cv_types = ['spherical', 'diag']#, 'tied'] # 'full' res = defaultdict(dict) for cv_type in cv_types: for n_components in n_components_range: # Fit a mixture of Gaussians with EM gmm = mixture.GMM(n_components=n_components, covariance_type=cv_type) gmm.fit(X) bic.append(gmm.bic(X)) res[cv_type][n_components] = gmm if bic[-1] < lowest_bic: lowest_bic = bic[-1] best_gmm = gmm bic = np.array(bic) if do_plot: # Plot the BIC scores color_iter = itertools.cycle(['k', 'r', 'g', 'b', 'c', 'm', 'y']) bars = [] spl = plt.subplot(1, 1, 1) for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)): xpos = np.array(n_components_range) + .2 * (i - 2) bars.append(plt.bar(xpos, bic[i * len(n_components_range): (i + 1) * len(n_components_range)], width=.2, color=color)) plt.xticks(n_components_range) plt.ylim([bic.min() * 1.01 - .01 * bic.max(), bic.max()]) plt.title('BIC score per model') xpos = np.mod(bic.argmin(), len(n_components_range)) + .65 +\ .2 * np.floor(bic.argmin() / len(n_components_range)) plt.text(xpos, bic.min() * 0.97 + .03 * bic.max(), '*', fontsize=14) spl.set_xlabel('Number of components') spl.legend([b[0] for b in bars], cv_types) plt.show() return res, best_gmm, bic
Example #29
Source File: graph_cuts.py From pyImSegm with BSD 3-Clause "New" or "Revised" License | 4 votes |
def compute_multivarian_otsu(features): """ compute otsu individually over each sample dimension WARNING: this compute only localy and since it does compare all combinations of orienting the asign for tight cases it may not decide :param ndarray features: :return list(bool): >>> np.random.seed(0) >>> fts = np.row_stack([np.random.random((5, 3)) - 1, ... np.random.random((5, 3)) + 1]) >>> fts[:, 1] = - fts[:, 1] >>> compute_multivarian_otsu(fts).astype(int) array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) """ ys = np.zeros(features.shape) for i in range(features.shape[-1]): thr = filters.threshold_otsu(features[:, i]) asign = features[:, i] > thr if i > 0: m = np.mean(ys[:, :i], axis=1) d1 = np.mean(np.abs(asign - m)) d2 = np.mean(np.abs(~asign - m)) # check if for this dimension it wount be better to swap it if d2 < d1: asign = ~asign ys[:, i] = asign y = np.mean(ys, axis=1) > 0.5 return y # def estim_class_model_gmm(features, nb_classes, init='kmeans'): # """ from all features estimate Gaussian Mixture Model and assuming # each cluster is a single class compute probability that each feature # belongs to each class # # :param [[float]] features: list of features per segment # :param int nb_classes: number of classes # :return [[float]]: probabilities that each feature belongs to each class # """ # logging.debug('estimate GMM for all given features %s and %i component', # repr(features.shape), nb_classes) # # http://scikit-learn.org/stable/modules/generated/sklearn.mixture.GMM.html # mm = mixture.GMM(n_components=nb_classes, covariance_type='full', n_iter=999) # if init == 'kmeans': # # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html # kmeans = cluster.KMeans(n_clusters=nb_classes, init='k-means++', n_jobs=-1) # y = kmeans.fit_predict(features) # mm.fit(features, y) # else: # mm.fit(features) # logging.info('compute probability of each feature to all component') # return mm
Example #30
Source File: page_elements2.py From namsel with MIT License | 4 votes |
def char_gaussians(cls, widths): widths = np.array(widths) widths.shape = (len(widths),1) cls.median_width = np.median(widths) gmm = GMM(n_components = 2, n_iter=100) try: gmm.fit(widths) except ValueError: return (0,0,0,0) means = gmm.means_ stds = np.sqrt(gmm.covars_) cls.gmm = gmm char_mean_ind = np.argmax(means) char_mean = float(means[char_mean_ind]) # Page character width mean char_std = float(stds[char_mean_ind][0]) # Page character std dev cls.tsek_mean_ind = np.argmin(means) tsek_mean = float(means[cls.tsek_mean_ind]) tsek_std = float(stds[cls.tsek_mean_ind][0]) # print gmm.converged_, 'converged' return (char_mean, char_std, tsek_mean, tsek_std) # def _gaussians(self, widths): ## print widths # widths = np.array(widths) # widths.shape = (len(widths),1) # # gmm = GMM(n_components = 3, n_iter=100) # try: # gmm.fit(widths) # except ValueError: # return (0,0,0,0) # means = gmm.means_ # stds = np.sqrt(gmm.covars_) # # argm = np.argmin(means) # self.smlmean = means[argm] # self.smstd = stds[argm] # cls.gmm = gmm # print gmm.converged_, 'converged' # from matplotlib import pyplot as plt # from matplotlib.mlab import normpdf ## plt.subplot(211) # plt.title('tsek-char distributions, pre-segmentation') # # n,bins,p = plt.hist(widths, 200, range=(0,75), normed=True, color='#3B60FA') ## plt.vlines(means, 0, np.array([max(n), max(n)]), linestyles='--') # for i, m in enumerate(means): # # plt.plot(bins, normpdf(bins, means[i], stds[i]), label='fit', linewidth=1) # plt.fill_between(bins, normpdf(bins, means[i], stds[i]), color=(.58,.63,.8), alpha=0.09) # # plt.show()