Python scipy.stats.mstats.gmean() Examples

The following are 20 code examples for showing how to use scipy.stats.mstats.gmean(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may want to check out the right sidebar which shows the related API usage.

You may also want to check out all available functions/classes of the module scipy.stats.mstats , or try the search function .

Example 1
Project: argus-freesound   Author: lRomul   File: stacking_predict.py    License: MIT License 6 votes vote down vote up
def experiment_pred(experiment_dir, images_lst):
    print(f"Start predict: {experiment_dir}")
    transforms = get_transforms(False, CROP_SIZE)

    pred_lst = []
    for fold in config.folds:
        print("Predict fold", fold)
        fold_dir = experiment_dir / f'fold_{fold}'
        model_path = get_best_model_path(fold_dir)
        print("Model path", model_path)
        predictor = Predictor(model_path, transforms,
                              BATCH_SIZE,
                              (config.audio.n_mels, CROP_SIZE),
                              (config.audio.n_mels, CROP_SIZE//TILE_STEP),
                              device=DEVICE)

        pred = pred_test(predictor, images_lst)
        pred_lst.append(pred)

    preds = gmean(pred_lst, axis=0)
    return preds 
Example 2
Project: argus-freesound   Author: lRomul   File: stacking_predict.py    License: MIT License 6 votes vote down vote up
def stacking_pred(experiment_dir, stack_probs):
    print(f"Start predict: {experiment_dir}")

    pred_lst = []
    for fold in config.folds:
        print("Predict fold", fold)
        fold_dir = experiment_dir / f'fold_{fold}'
        model_path = get_best_model_path(fold_dir)
        print("Model path", model_path)
        predictor = StackPredictor(model_path, STACK_BATCH_SIZE,
                                   device=DEVICE)
        pred = predictor.predict(stack_probs)
        pred_lst.append(pred)

    preds = gmean(pred_lst, axis=0)
    return preds 
Example 3
Project: Computable   Author: ktraunmueller   File: test_mstats_basic.py    License: MIT License 6 votes vote down vote up
def test_1D(self):
        a = (1,2,3,4)
        actual = mstats.gmean(a)
        desired = np.power(1*2*3*4,1./4.)
        assert_almost_equal(actual, desired,decimal=14)

        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)
        assert_(not isinstance(desired1, ma.MaskedArray))

        a = ma.array((1,2,3,4),mask=(0,0,0,1))
        actual = mstats.gmean(a)
        desired = np.power(1*2*3,1./3.)
        assert_almost_equal(actual, desired,decimal=14)

        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14) 
Example 4
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    License: MIT License 6 votes vote down vote up
def test_1D(self):
        a = (1,2,3,4)
        actual = mstats.gmean(a)
        desired = np.power(1*2*3*4,1./4.)
        assert_almost_equal(actual, desired, decimal=14)

        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14)
        assert_(not isinstance(desired1, ma.MaskedArray))

        a = ma.array((1,2,3,4),mask=(0,0,0,1))
        actual = mstats.gmean(a)
        desired = np.power(1*2*3,1./3.)
        assert_almost_equal(actual, desired,decimal=14)

        desired1 = mstats.gmean(a,axis=-1)
        assert_almost_equal(actual, desired1, decimal=14) 
Example 5
Project: eleven   Author: tdsmith   File: eleven.py    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
def calculate_all_nfs(sample_frame, ranked_targets, ref_sample):
    """For a set of n ranked_genes, calculates normalization factors NF_1,
    NF_2, ..., NF_n. NF_i represents the normalization factor generated by
    considering the first i targets in ranked_targets.
    
    calculate_nf (which returns only NF_n) is probably more
    useful for routine analysis.

    :param DataFrame sample_frame: A sample data frame.
    :param iterable ranked_targets: A list or Series of target names, in order
        of descending stability (ascending M).
    :param string ref_sample: The name of the sample to normalize against.
    :return: a DataFrame with columns 1, 2, ..., n containing normalization
        factors NF_1, ..., NF_n for each sample, indexed by sample name.
    :rtype: DataFrame
    """

    # Returns a DataFrame, where rows represent samples and columns represent a number of reference genes.
    grouped = sample_frame.groupby(['Target', 'Sample'])['Cq'].aggregate(average_cq)
    samples = sample_frame['Sample'].unique()
    nfs = {}
    for i in xrange(1, len(ranked_targets)+1):
        nfs[i] = gmean([pow(2, -grouped.ix[zip(repeat(ref_gene), samples)] + grouped.ix[ref_gene, ref_sample]) for ref_gene in ranked_targets[:i]])
    return pd.DataFrame(nfs, index=samples) 
Example 6
Project: argus-freesound   Author: lRomul   File: utils.py    License: MIT License 5 votes vote down vote up
def gmean_preds_blend(probs_df_lst):
    blend_df = probs_df_lst[0]
    blend_values = np.stack([df.loc[blend_df.index.values].values
                             for df in probs_df_lst], axis=0)
    blend_values = gmean(blend_values, axis=0)

    blend_df.values[:] = blend_values
    return blend_df 
Example 7
Project: Computable   Author: ktraunmueller   File: test_mstats_basic.py    License: MIT License 5 votes vote down vote up
def test_2D(self):
        a = ma.array(((1,2,3,4),(1,2,3,4),(1,2,3,4)),
                     mask=((0,0,0,0),(1,0,0,1),(0,1,1,0)))
        actual = mstats.gmean(a)
        desired = np.array((1,2,3,4))
        assert_array_almost_equal(actual, desired, decimal=14)
        #
        desired1 = mstats.gmean(a,axis=0)
        assert_array_almost_equal(actual, desired1, decimal=14)
        #
        actual = mstats.gmean(a, -1)
        desired = ma.array((np.power(1*2*3*4,1./4.),
                            np.power(2*3,1./2.),
                            np.power(1*4,1./2.)))
        assert_array_almost_equal(actual, desired, decimal=14) 
Example 8
Project: torchdiffeq   Author: rtqichen   File: run.py    License: MIT License 5 votes vote down vote up
def main():

    sol = dict()
    for method in ['dopri5', 'adams']:
        for tol in [1e-3, 1e-6, 1e-9]:
            print('======= {} | tol={:e} ======='.format(method, tol))
            nfes = []
            times = []
            errs = []
            for c in ['A', 'B', 'C', 'D', 'E']:
                for i in ['1', '2', '3', '4', '5']:
                    diffeq, init, _ = getattr(detest, c + i)()
                    t0, y0 = init()
                    diffeq = NFEDiffEq(diffeq)

                    if not c + i in sol:
                        sol[c + i] = odeint(
                            diffeq, y0, torch.stack([t0, torch.tensor(20.)]), atol=1e-12, rtol=1e-12, method='dopri5'
                        )[1]
                        diffeq.nfe = 0

                    start_time = time.time()
                    est = odeint(diffeq, y0, torch.stack([t0, torch.tensor(20.)]), atol=tol, rtol=tol, method=method)
                    time_spent = time.time() - start_time

                    error = torch.sqrt(torch.mean((sol[c + i] - est[1])**2))

                    errs.append(error.item())
                    nfes.append(diffeq.nfe)
                    times.append(time_spent)

                    print('{}: NFE {} | Time {} | Err {:e}'.format(c + i, diffeq.nfe, time_spent, error.item()))

            print('Total NFE {} | Total Time {} | GeomAvg Error {:e}'.format(np.sum(nfes), np.sum(times), gmean(errs))) 
Example 9
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    License: MIT License 5 votes vote down vote up
def test_1D_float96(self):
        a = ma.array((1,2,3,4), mask=(0,0,0,1))
        actual_dt = mstats.gmean(a, dtype=np.float96)
        desired_dt = np.power(1 * 2 * 3, 1. / 3.).astype(np.float96)
        assert_almost_equal(actual_dt, desired_dt, decimal=14)
        assert_(actual_dt.dtype == desired_dt.dtype) 
Example 10
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    License: MIT License 5 votes vote down vote up
def test_2D(self):
        a = ma.array(((1, 2, 3, 4), (1, 2, 3, 4), (1, 2, 3, 4)),
                     mask=((0, 0, 0, 0), (1, 0, 0, 1), (0, 1, 1, 0)))
        actual = mstats.gmean(a)
        desired = np.array((1,2,3,4))
        assert_array_almost_equal(actual, desired, decimal=14)

        desired1 = mstats.gmean(a,axis=0)
        assert_array_almost_equal(actual, desired1, decimal=14)

        actual = mstats.gmean(a, -1)
        desired = ma.array((np.power(1*2*3*4,1./4.),
                            np.power(2*3,1./2.),
                            np.power(1*4,1./2.)))
        assert_array_almost_equal(actual, desired, decimal=14) 
Example 11
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_mstats_basic.py    License: MIT License 5 votes vote down vote up
def test_gmean(self):
        for n in self.get_n():
            x, y, xm, ym = self.generate_xy_sample(n)
            r = stats.gmean(abs(x))
            rm = stats.mstats.gmean(abs(xm))
            assert_allclose(r, rm, rtol=1e-13)

            r = stats.gmean(abs(y))
            rm = stats.mstats.gmean(abs(ym))
            assert_allclose(r, rm, rtol=1e-13) 
Example 12
Project: phylowgs   Author: morrislab   File: create_phylowgs_inputs.py    License: GNU General Public License v3.0 5 votes vote down vote up
def impute_missing_total_reads(total_reads, missing_variant_confidence):
  # Change NaNs to masked values via SciPy.
  masked_total_reads = ma.fix_invalid(total_reads)

  # Going forward, suppose you have v variants and s samples in a v*s matrix of
  # read counts. Missing values are masked.

  # Calculate geometric mean of variant read depth in each sample. Result: s*1
  sample_means = gmean(masked_total_reads, axis=0)
  assert np.sum(sample_means <= 0) == np.sum(np.isnan(sample_means)) == 0
  # Divide every variant's read count by its mean sample read depth to get read
  # depth enrichment relative to other variants in sample. Result: v*s
  normalized_to_sample = np.dot(masked_total_reads, np.diag(1./sample_means))
  # For each variant, calculate geometric mean of its read depth enrichment
  # across samples. Result: v*1
  variant_mean_reads = gmean(normalized_to_sample, axis=1)
  assert np.sum(variant_mean_reads <= 0) == np.sum(np.isnan(variant_mean_reads)) == 0

  # Convert 1D arrays to vectors to permit matrix multiplication.
  imputed_counts = np.dot(variant_mean_reads.reshape((-1, 1)), sample_means.reshape((1, -1)))
  nan_coords = np.where(np.isnan(total_reads))
  total_reads[nan_coords] = imputed_counts[nan_coords]
  assert np.sum(total_reads <= 0) == np.sum(np.isnan(total_reads)) == 0

  total_reads[nan_coords] *= missing_variant_confidence
  return np.floor(total_reads).astype(np.int) 
Example 13
Project: eleven   Author: tdsmith   File: eleven.py    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
def calculate_nf(sample_frame, ref_targets, ref_sample):
    """Calculates a normalization factor from the geometric mean of the
    expression of all ref_targets, normalized to a reference sample.

    :param DataFrame sample_frame: A sample data frame.
    :param iterable ref_targets: A list or Series of target names.
    :param string ref_sample: The name of the sample to normalize against.
    :return: a Series indexed by sample name containing normalization factors
        for each sample.
    """
    grouped = sample_frame.groupby(['Target', 'Sample'])['Cq'].aggregate(average_cq)
    samples = sample_frame['Sample'].unique()
    nfs = gmean([pow(2, -grouped.ix[zip(repeat(ref_gene), samples)] + grouped.ix[ref_gene, ref_sample]) for ref_gene in ref_targets])
    return pd.Series(nfs, index=samples) 
Example 14
Project: alphacsc   Author: alphacsc   File: convolve_ztz.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def benchmark():
    n_atoms_range = [1, 3, 9]
    n_channels_range = [1, 25, 50, 100, 200]
    n_times_atom_range = [8, 32, 128]

    n_runs = (len(n_atoms_range) * len(n_channels_range) * len(
        n_times_atom_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for n_channels in n_channels_range:
            for n_times_atom in n_times_atom_range:
                for func in all_func:
                    print('%d/%d, %s' % (k, n_runs, func.__name__))
                    k += 1
                    results.append(
                        run_one(n_atoms, n_channels, n_times_atom, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'n_channels', 'n_times_atom', 'func', 'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('n_channels', axes[2])
    # plot('n_times_valid', axes[3])
    plt.tight_layout()
    plt.show() 
Example 15
Project: alphacsc   Author: alphacsc   File: multivariate_convolve.py    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def benchmark():
    n_atoms_range = [1, 2, 4, 8, 16]
    n_channels_range = [10, 20, 40, 80, 160]
    n_times_atom_range = [10, 20, 40, 80, 160]
    n_runs = (len(n_atoms_range) * len(n_channels_range) *
              len(n_times_atom_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for n_channels in n_channels_range:
            for n_times_atom in n_times_atom_range:
                for func in all_func:
                    print('%d/%d, %s' % (k, n_runs, func.__name__))
                    k += 1
                    results.append(
                        run_one(n_atoms, n_channels, n_times_atom, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'n_channels', 'n_times_atom', 'func', 'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('n_channels', axes[2])
    plt.tight_layout()
    plt.show() 
Example 16
Project: tfdiffeq   Author: titu1994   File: run.py    License: MIT License 5 votes vote down vote up
def main():

    sol = dict()
    for method in ['dopri5', 'adams']:
        for tol in [1e-3, 1e-6, 1e-9]:
            print('======= {} | tol={:e} ======='.format(method, tol))
            nfes = []
            times = []
            errs = []
            for c in ['A', 'B', 'C', 'D', 'E']:
                for i in ['1', '2', '3', '4', '5']:
                    diffeq, init, _ = getattr(detest, c + i)()
                    t0, y0 = init()
                    diffeq = NFEDiffEq(diffeq)

                    if not c + i in sol:
                        sol[c + i] = odeint(
                            diffeq, y0, tf.stack([t0, tf.convert_to_tensor(20., dtype=tf.float64)]), atol=1e-12, rtol=1e-12,
                            method='dopri5'
                        )[1]
                        diffeq.nfe = 0

                    start_time = time.time()
                    est = odeint(diffeq, y0, tf.stack([t0, tf.convert_to_tensor(20., dtype=tf.float64)]), atol=tol, rtol=tol,
                                 method=method)
                    time_spent = time.time() - start_time

                    error = tf.sqrt(tf.reduce_mean((sol[c + i] - est[1])**2))

                    errs.append(error.numpy())
                    nfes.append(diffeq.nfe)
                    times.append(time_spent)

                    print('{}: NFE {} | Time {} | Err {:e}'.format(c + i, diffeq.nfe, time_spent, error.numpy()))

            print('Total NFE {} | Total Time {} | GeomAvg Error {:e}'.format(np.sum(nfes), np.sum(times), gmean(errs))) 
Example 17
Project: topic-stability   Author: derekgreene   File: lda.py    License: Apache License 2.0 5 votes vote down vote up
def __rerank_terms( self, num_terms, k, mallet_weights_path, eps = 1e-9 ):
		"""
		Implements the term re-weighting method proposed by Blei and Lafferty.
		"""
		log.debug( "Reweighting terms  ..." )
		# Parse weights for all terms and topics
		W = np.zeros( (num_terms, k) )
		with open(mallet_weights_path) as f:
			lines = [line.rstrip() for line in f]
			for l in lines:
				if len(l) == 0 or l.startswith("#"):
					continue
				parts = l.split("\t")
				if len(parts) < 3:
					continue
				topic_index = int(parts[0])
				term_index = int(parts[1])		
				W[term_index,topic_index] = float(parts[2])
		# Calculate geometric means
		gmeans = gmean( W, axis = 1 )
		# Reweight the terms
		# TODO: vectorize this
		for row in range(num_terms):
			if gmeans[row] <= eps or np.isnan( gmeans[row] ):
				continue
			for col in range(k):
				if W[row,col] <= eps:
					continue
				lp = np.log( W[row,col] / gmeans[row] )
				if np.isnan( lp ):
					continue
				W[row,col] = W[row,col] * lp
		# Get new term rankings per topic
		rankings = []
		for topic_index in range(k):
			ranking = np.argsort( W[:,topic_index] )[::-1].tolist()
			rankings.append( ranking )
		return rankings 
Example 18
Project: alphacsc   Author: alphacsc   File: multivariate_convolve_2.py    License: BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def benchmark():
    n_atoms_range = [1, 10, 100]
    sparsity_range = np.logspace(-4, -1, 5)
    n_times_atom_range = [10, 40, 160]
    n_times_valid_range = [200, 800, 3200]

    n_runs = (len(n_atoms_range) * len(sparsity_range) * len(
        n_times_atom_range) * len(n_times_valid_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for sparsity in sparsity_range:
            for n_times_atom in n_times_atom_range:
                for n_times_valid in n_times_valid_range:
                    for func in all_func:
                        print('%d/%d, %s' % (k, n_runs, func.__name__))
                        k += 1
                        results.append(
                            run_one(n_atoms, sparsity, n_times_atom,
                                    n_times_valid, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'sparsity', 'n_times_atom', 'n_times_valid', 'func',
        'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('sparsity', axes[2])
    plot('n_times_valid', axes[3])
    plt.tight_layout()
    plt.show() 
Example 19
Project: alphacsc   Author: alphacsc   File: compute_ztz.py    License: BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def benchmark():
    n_atoms_range = [1, 3, 9]
    sparsity_range = [0.01, 0.03, 0.1, 0.3]
    n_times_atom_range = [8, 32, 128]
    n_times_valid_range = [1000, 30000, 10000]

    n_runs = (len(n_atoms_range) * len(sparsity_range) * len(
        n_times_atom_range) * len(n_times_valid_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for sparsity in sparsity_range:
            for n_times_atom in n_times_atom_range:
                for n_times_valid in n_times_valid_range:
                    for func in all_func:
                        print('%d/%d, %s' % (k, n_runs, func.__name__))
                        k += 1
                        results.append(
                            run_one(n_atoms, sparsity, n_times_atom,
                                    n_times_valid, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'sparsity', 'n_times_atom', 'n_times_valid', 'func',
        'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('sparsity', axes[2])
    plot('n_times_valid', axes[3])
    plt.tight_layout()
    plt.show() 
Example 20
Project: alphacsc   Author: alphacsc   File: compute_ztX.py    License: BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def benchmark():
    n_atoms_range = [1, 4, 16]
    sparsity_range = np.logspace(-4, -1, 5)
    n_times_atom_range = [10, 40, 160]
    n_times_valid_range = [200, 800, 3200]

    n_runs = (len(n_atoms_range) * len(sparsity_range) * len(
        n_times_atom_range) * len(n_times_valid_range) * len(all_func))

    k = 0
    results = []
    for n_atoms in n_atoms_range:
        for sparsity in sparsity_range:
            for n_times_atom in n_times_atom_range:
                for n_times_valid in n_times_valid_range:
                    for func in all_func:
                        print('%d/%d, %s' % (k, n_runs, func.__name__))
                        k += 1
                        results.append(
                            run_one(n_atoms, sparsity, n_times_atom,
                                    n_times_valid, func))

    df = pd.DataFrame(results, columns=[
        'n_atoms', 'sparsity', 'n_times_atom', 'n_times_valid', 'func',
        'duration'
    ])
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    axes = axes.ravel()

    def plot(index, ax):
        pivot = df.pivot_table(columns='func', index=index, values='duration',
                               aggfunc=gmean)
        pivot.plot(ax=ax)
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_ylabel('duration')

    plot('n_atoms', axes[0])
    plot('n_times_atom', axes[1])
    plot('sparsity', axes[2])
    plot('n_times_valid', axes[3])
    plt.tight_layout()
    plt.show()