Java Code Examples for org.apache.commons.math3.stat.descriptive.SummaryStatistics#getSum()

The following examples show how to use org.apache.commons.math3.stat.descriptive.SummaryStatistics#getSum() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Stats.java    From tablesaw with Apache License 2.0 6 votes vote down vote up
private static Stats getStats(NumericColumn<?> values, SummaryStatistics summaryStatistics) {
  Stats stats = new Stats("Column: " + values.name());
  stats.min = summaryStatistics.getMin();
  stats.max = summaryStatistics.getMax();
  stats.n = summaryStatistics.getN();
  stats.sum = summaryStatistics.getSum();
  stats.variance = summaryStatistics.getVariance();
  stats.populationVariance = summaryStatistics.getPopulationVariance();
  stats.quadraticMean = summaryStatistics.getQuadraticMean();
  stats.geometricMean = summaryStatistics.getGeometricMean();
  stats.mean = summaryStatistics.getMean();
  stats.standardDeviation = summaryStatistics.getStandardDeviation();
  stats.sumOfLogs = summaryStatistics.getSumOfLogs();
  stats.sumOfSquares = summaryStatistics.getSumsq();
  stats.secondMoment = summaryStatistics.getSecondMoment();
  return stats;
}
 
Example 2
Source File: AbstractOwlSim.java    From owltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * This function will take an aggregated collection of Summary Statistics
 * and will generate a derived {@link SummaryStatistic} based on a flag for the  
 * desired summation.  This is particularly helpful for finding out the
 * means of the individual statistics of the collection.
 * For example, if you wanted to find out the mean of means of the collection
 * you would call this function like <p>
 * getSummaryStatisticsForCollection(aggregate,1).getMean(); <p>
 * Or if you wanted to determine the max number of annotations per
 * individual, you could call: <p>
 * getSummaryStatisticsForCollection(aggregate,5).getMax(); <p>
 * The stat flag should be set to the particular individual statistic that should
 * be summarized over.
 *
 * @param aggregate The aggregated collection of summary statistics
 * @param stat  Integer flag for the statistic (1:mean ; 2:sum; 3:min; 4:max; 5:N)
 * @return {@link SummaryStatistics} of the selected statistic
 */
public SummaryStatistics getSummaryStatisticsForCollection(Collection<SummaryStatistics> aggregate, Stat stat) {
	//LOG.info("Computing stats over collection of "+aggregate.size()+" elements ("+stat+"):");
	//TODO: turn stat into enum
	int x = 0;
	//To save memory, I am using SummaryStatistics, which does not store the values,
	//but this could be changed to DescriptiveStatistics to see values
	//as well as other statistical functions like distributions
	SummaryStatistics stats = new SummaryStatistics();
	Double v = 0.0;
	ArrayList<String> vals = new ArrayList();
	for (SummaryStatistics s : aggregate) {
		switch (stat) {
		case MEAN : v= s.getMean(); stats.addValue(s.getMean()); break;
		case SUM : v=s.getSum(); stats.addValue(s.getSum()); break;
		case MIN : v=s.getMin(); stats.addValue(s.getMin()); break;
		case MAX : v=s.getMax(); stats.addValue(s.getMax()); break;
		case N : v= ((int)s.getN())*1.0; stats.addValue(s.getN()); break;
		};
		//vals.add(v.toString());
	};
	//LOG.info("vals: "+vals.toString());
	return stats;
}
 
Example 3
Source File: AbstractOwlSim.java    From owltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public double calculateOverallAnnotationSufficiencyForAttributeSet(Set<OWLClass> atts) throws UnknownOWLClassException {
	SummaryStatistics stats = computeAttributeSetSimilarityStats(atts);
	if ((this.getSummaryStatistics() == null) || Double.isNaN(this.getSummaryStatistics().mean.getMean())) {
		LOG.info("Stats have not been computed yet - doing this now");
		this.computeSystemStats();
	}
	// score = mean(atts)/mean(overall) + max(atts)/max(overall) + sum(atts)/mean(sum(overall))
	double overall_score = 0.0;
	Double mean_score = stats.getMean();
	Double max_score = stats.getMax();
	Double sum_score = stats.getSum();
	if (!(mean_score.isNaN() || max_score.isNaN() || sum_score.isNaN())) {
		mean_score = StatUtils.min(new double[]{(mean_score / this.overallSummaryStatsPerIndividual.mean.getMean()),1.0});
		max_score = StatUtils.min(new double[]{(max_score / this.overallSummaryStatsPerIndividual.max.getMax()),1.0});
		sum_score = StatUtils.min(new double[]{(sum_score / this.overallSummaryStatsPerIndividual.sum.getMean()),1.0});
		overall_score = (mean_score + max_score + sum_score) / 3;		
	}
	LOG.info("Overall mean: "+mean_score + " max: "+max_score + " sum:"+sum_score + " combined:"+overall_score);
	return overall_score;
}
 
Example 4
Source File: Stats.java    From tablesaw with Apache License 2.0 6 votes vote down vote up
private static Stats getStats(NumericColumn<?> values, SummaryStatistics summaryStatistics) {
  Stats stats = new Stats("Column: " + values.name());
  stats.min = summaryStatistics.getMin();
  stats.max = summaryStatistics.getMax();
  stats.n = summaryStatistics.getN();
  stats.sum = summaryStatistics.getSum();
  stats.variance = summaryStatistics.getVariance();
  stats.populationVariance = summaryStatistics.getPopulationVariance();
  stats.quadraticMean = summaryStatistics.getQuadraticMean();
  stats.geometricMean = summaryStatistics.getGeometricMean();
  stats.mean = summaryStatistics.getMean();
  stats.standardDeviation = summaryStatistics.getStandardDeviation();
  stats.sumOfLogs = summaryStatistics.getSumOfLogs();
  stats.sumOfSquares = summaryStatistics.getSumsq();
  stats.secondMoment = summaryStatistics.getSecondMoment();
  return stats;
}
 
Example 5
Source File: AbstractOwlSim.java    From owltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public double calculateSubgraphAnnotationSufficiencyForAttributeSet(Set<OWLClass> atts, OWLClass c) throws UnknownOWLClassException {
	SummaryStatistics stats = computeAttributeSetSimilarityStatsForSubgraph(atts,c);
	//TODO: compute statsPerIndividual for this subgraph
	if ((this.overallSummaryStatsPerIndividual == null ) || (Double.isNaN(this.overallSummaryStatsPerIndividual.max.getMean()))) {
		LOG.info("Stats have not been computed yet - doing this now");
		this.computeSystemStats();
	}

	if (!(this.subgraphSummaryStatsPerIndividual.containsKey(c))) {
		//only do this once for the whole system, per class requested
		this.computeSystemStatsForSubgraph(c);
	}
	// score = mean(atts)/mean(overall) + max(atts)/max(overall) + sum(atts)/mean(sum(overall))
	//TODO: need to normalize this based on the whole corpus
	double score = 0.0;
	Double mean_score = stats.getMean();
	Double max_score = stats.getMax();
	Double sum_score = stats.getSum();

	if (!(mean_score.isNaN() || max_score.isNaN() || sum_score.isNaN())) {
		mean_score = StatUtils.min(new double[]{(mean_score / this.subgraphSummaryStatsPerIndividual.get(c).mean.getMean()),1.0});
		max_score = StatUtils.min(new double[]{(max_score / this.subgraphSummaryStatsPerIndividual.get(c).max.getMax()),1.0});
		sum_score = StatUtils.min(new double[]{(sum_score / this.subgraphSummaryStatsPerIndividual.get(c).sum.getMean()),1.0});
		score = (mean_score + max_score + sum_score) / 3;		
	}
	LOG.info(getShortId(c)+" n: "+stats.getN()+" mean: "+mean_score + " max: "+max_score + " sum:"+sum_score + " combined:"+score);
	return score;
}
 
Example 6
Source File: InMemoryCacheStatistics.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
@Override
public void add(String cacheName, TransactionStats txStats)
{
    boolean registerCacheStats = false;
    WriteLock writeLock = getWriteLock(cacheName);
    writeLock.lock();
    try
    {
        // Are we adding new stats for a previously unseen cache?
        registerCacheStats = !cacheToStatsMap.containsKey(cacheName);
        if (registerCacheStats)
        {
            // There are no statistics yet for this cache. 
            cacheToStatsMap.put(cacheName, new HashMap<OpType, OperationStats>());
        }
        Map<OpType, OperationStats> cacheStats = cacheToStatsMap.get(cacheName);
        
        for (OpType opType : OpType.values())
        {                
            SummaryStatistics txOpSummary = txStats.getTimings(opType);
            long count = txOpSummary.getN();
            double totalTime = txOpSummary.getSum();
                
            OperationStats oldStats = cacheStats.get(opType);
            OperationStats newStats;
            if (oldStats == null)
            {
                newStats = new OperationStats(totalTime, count);
            }
            else
            {
                newStats = new OperationStats(oldStats, totalTime, count);
            }
            cacheStats.put(opType, newStats);
        }
    }
    finally
    {
        writeLock.unlock();
    }
    
    if (registerCacheStats)
    {
        // We've added stats for a previously unseen cache, raise an event
        // so that an MBean for the cache may be registered, for example. 
        applicationContext.publishEvent(new CacheStatisticsCreated(this, cacheName));
    }
}
 
Example 7
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * This method actually does the calculations (except P-value).
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return computed AnovaStats
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of
 * categories is less than 2 or a contained SummaryStatistics does not contain
 * at least two values
 */
private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData,
                              final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException {

    MathUtils.checkNotNull(categoryData);

    if (!allowOneElementData) {
        // check if we have enough categories
        if (categoryData.size() < 2) {
            throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
                                                 categoryData.size(), 2);
        }

        // check if each category has enough data
        for (final SummaryStatistics array : categoryData) {
            if (array.getN() <= 1) {
                throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
                                                     (int) array.getN(), 2);
            }
        }
    }

    int dfwg = 0;
    double sswg = 0;
    double totsum = 0;
    double totsumsq = 0;
    int totnum = 0;

    for (final SummaryStatistics data : categoryData) {

        final double sum = data.getSum();
        final double sumsq = data.getSumsq();
        final int num = (int) data.getN();
        totnum += num;
        totsum += sum;
        totsumsq += sumsq;

        dfwg += num - 1;
        final double ss = sumsq - ((sum * sum) / num);
        sswg += ss;
    }

    final double sst = totsumsq - ((totsum * totsum) / totnum);
    final double ssbg = sst - sswg;
    final int dfbg = categoryData.size() - 1;
    final double msbg = ssbg / dfbg;
    final double mswg = sswg / dfwg;
    final double F = msbg / mswg;

    return new AnovaStats(dfbg, dfwg, F);

}
 
Example 8
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * This method actually does the calculations (except P-value).
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return computed AnovaStats
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of
 * categories is less than 2 or a contained SummaryStatistics does not contain
 * at least two values
 */
private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData,
                              final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException {

    MathUtils.checkNotNull(categoryData);

    if (!allowOneElementData) {
        // check if we have enough categories
        if (categoryData.size() < 2) {
            throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
                                                 categoryData.size(), 2);
        }

        // check if each category has enough data
        for (final SummaryStatistics array : categoryData) {
            if (array.getN() <= 1) {
                throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
                                                     (int) array.getN(), 2);
            }
        }
    }

    int dfwg = 0;
    double sswg = 0;
    double totsum = 0;
    double totsumsq = 0;
    int totnum = 0;

    for (final SummaryStatistics data : categoryData) {

        final double sum = data.getSum();
        final double sumsq = data.getSumsq();
        final int num = (int) data.getN();
        totnum += num;
        totsum += sum;
        totsumsq += sumsq;

        dfwg += num - 1;
        final double ss = sumsq - ((sum * sum) / num);
        sswg += ss;
    }

    final double sst = totsumsq - ((totsum * totsum) / totnum);
    final double ssbg = sst - sswg;
    final int dfbg = categoryData.size() - 1;
    final double msbg = ssbg / dfbg;
    final double mswg = sswg / dfwg;
    final double F = msbg / mswg;

    return new AnovaStats(dfbg, dfwg, F);

}
 
Example 9
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * This method actually does the calculations (except P-value).
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return computed AnovaStats
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of
 * categories is less than 2 or a contained SummaryStatistics does not contain
 * at least two values
 */
private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData,
                              final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException {

    MathUtils.checkNotNull(categoryData);

    if (!allowOneElementData) {
        // check if we have enough categories
        if (categoryData.size() < 2) {
            throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
                                                 categoryData.size(), 2);
        }

        // check if each category has enough data
        for (final SummaryStatistics array : categoryData) {
            if (array.getN() <= 1) {
                throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
                                                     (int) array.getN(), 2);
            }
        }
    }

    int dfwg = 0;
    double sswg = 0;
    double totsum = 0;
    double totsumsq = 0;
    int totnum = 0;

    for (final SummaryStatistics data : categoryData) {

        final double sum = data.getSum();
        final double sumsq = data.getSumsq();
        final int num = (int) data.getN();
        totnum += num;
        totsum += sum;
        totsumsq += sumsq;

        dfwg += num - 1;
        final double ss = sumsq - ((sum * sum) / num);
        sswg += ss;
    }

    final double sst = totsumsq - ((totsum * totsum) / totnum);
    final double ssbg = sst - sswg;
    final int dfbg = categoryData.size() - 1;
    final double msbg = ssbg / dfbg;
    final double mswg = sswg / dfwg;
    final double F = msbg / mswg;

    return new AnovaStats(dfbg, dfwg, F);

}
 
Example 10
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * This method actually does the calculations (except P-value).
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return computed AnovaStats
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of
 * categories is less than 2 or a contained SummaryStatistics does not contain
 * at least two values
 */
private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData,
                              final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException {

    MathUtils.checkNotNull(categoryData);

    if (!allowOneElementData) {
        // check if we have enough categories
        if (categoryData.size() < 2) {
            throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
                                                 categoryData.size(), 2);
        }

        // check if each category has enough data
        for (final SummaryStatistics array : categoryData) {
            if (array.getN() <= 1) {
                throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
                                                     (int) array.getN(), 2);
            }
        }
    }

    int dfwg = 0;
    double sswg = 0;
    double totsum = 0;
    double totsumsq = 0;
    int totnum = 0;

    for (final SummaryStatistics data : categoryData) {

        final double sum = data.getSum();
        final double sumsq = data.getSumsq();
        final int num = (int) data.getN();
        totnum += num;
        totsum += sum;
        totsumsq += sumsq;

        dfwg += num - 1;
        final double ss = sumsq - ((sum * sum) / num);
        sswg += ss;
    }

    final double sst = totsumsq - ((totsum * totsum) / totnum);
    final double ssbg = sst - sswg;
    final int dfbg = categoryData.size() - 1;
    final double msbg = ssbg / dfbg;
    final double mswg = sswg / dfwg;
    final double F = msbg / mswg;

    return new AnovaStats(dfbg, dfwg, F);

}
 
Example 11
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * This method actually does the calculations (except P-value).
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return computed AnovaStats
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of
 * categories is less than 2 or a contained SummaryStatistics does not contain
 * at least two values
 */
private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData,
                              final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException {

    MathUtils.checkNotNull(categoryData);

    if (!allowOneElementData) {
        // check if we have enough categories
        if (categoryData.size() < 2) {
            throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
                                                 categoryData.size(), 2);
        }

        // check if each category has enough data
        for (final SummaryStatistics array : categoryData) {
            if (array.getN() <= 1) {
                throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
                                                     (int) array.getN(), 2);
            }
        }
    }

    int dfwg = 0;
    double sswg = 0;
    double totsum = 0;
    double totsumsq = 0;
    int totnum = 0;

    for (final SummaryStatistics data : categoryData) {

        final double sum = data.getSum();
        final double sumsq = data.getSumsq();
        final int num = (int) data.getN();
        totnum += num;
        totsum += sum;
        totsumsq += sumsq;

        dfwg += num - 1;
        final double ss = sumsq - ((sum * sum) / num);
        sswg += ss;
    }

    final double sst = totsumsq - ((totsum * totsum) / totnum);
    final double ssbg = sst - sswg;
    final int dfbg = categoryData.size() - 1;
    final double msbg = ssbg / dfbg;
    final double mswg = sswg / dfwg;
    final double F = msbg / mswg;

    return new AnovaStats(dfbg, dfwg, F);

}
 
Example 12
Source File: StochasticBankIT.java    From fluo with Apache License 2.0 4 votes vote down vote up
private static void runVerifier(Environment env, int numAccounts, int num) {
  TestTransaction lastTx = null;

  try {

    for (int i = 0; i < num; i++) {

      if (i == num / 2) {
        env.getAccumuloClient().tableOperations().compact(env.getTable(), null, null, true,
            false);
      }

      long t1 = System.currentTimeMillis();

      TestTransaction tx = new TestTransaction(env);
      SummaryStatistics stat = new SummaryStatistics();

      for (RowColumnValue rcv : tx.scanner().build()) {
        int amt = Integer.parseInt(rcv.getValue().toString());
        stat.addValue(amt);
      }

      long t2 = System.currentTimeMillis();

      log.debug("avg : %,9.2f  min : %,6d  max : %,6d  stddev : %1.2f  rate : %,6.2f\n",
          stat.getMean(), stat.getMin(), stat.getMax(), stat.getStandardDeviation(),
          numAccounts / ((t2 - t1) / 1000.0));

      if (stat.getSum() != numAccounts * 1000) {
        if (lastTx != null) {
          printDiffs(env, lastTx, tx);
        }
      }

      Assert.assertEquals(numAccounts * 1000, (int) stat.getSum());

      lastTx = tx;
    }
  } catch (Exception e) {
    e.printStackTrace();
  }
}