org.apache.commons.math3.stat.descriptive.SummaryStatistics Java Examples

The following examples show how to use org.apache.commons.math3.stat.descriptive.SummaryStatistics. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractDyadScaler.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Fits the standard scaler to the dataset.
 *
 * @param dataset The dataset the scaler should be fit to.
 */
public void fit(final IDyadRankingDataset dataset) {
	int lengthX = dataset.get(0).getLabel().get(0).getContext().length();
	int lengthY = dataset.get(0).getLabel().get(0).getAlternative().length();
	this.statsX = new SummaryStatistics[lengthX];
	this.statsY = new SummaryStatistics[lengthY];
	for (int i = 0; i < lengthX; i++) {
		this.statsX[i] = new SummaryStatistics();
	}
	for (int i = 0; i < lengthY; i++) {
		this.statsY[i] = new SummaryStatistics();
	}
	for (IDyadRankingInstance instance : dataset) {
		for (IDyad dyad : instance) {
			for (int i = 0; i < lengthX; i++) {
				this.statsX[i].addValue(dyad.getContext().getValue(i));
			}
			for (int i = 0; i < lengthY; i++) {
				this.statsY[i].addValue(dyad.getAlternative().getValue(i));
			}
		}
	}
}
 
Example #2
Source File: TestSolrCachePerf.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetPutCompute() throws Exception {
  Map<String, SummaryStatistics> getPutRatio = new HashMap<>();
  Map<String, SummaryStatistics> computeRatio = new HashMap<>();
  Map<String, SummaryStatistics> getPutTime = new HashMap<>();
  Map<String, SummaryStatistics> computeTime = new HashMap<>();
  // warm-up
  int threads = 10;
  for (int i = 0; i < 10; i++) {
    doTestGetPutCompute(new HashMap<String, SummaryStatistics>(), new HashMap<String, SummaryStatistics>(), threads, false);
    doTestGetPutCompute(new HashMap<String, SummaryStatistics>(), new HashMap<String, SummaryStatistics>(), threads, true);
  }
  for (int i = 0; i < 100; i++) {
    doTestGetPutCompute(getPutRatio, getPutTime, threads, false);
    doTestGetPutCompute(computeRatio, computeTime, threads, true);
  }
  computeRatio.forEach((type, computeStats) -> {
    SummaryStatistics getPutStats = getPutRatio.get(type);
    assertGreaterThanOrEqual( "Compute ratio should be higher or equal to get/put ratio", computeStats.getMean(), getPutStats.getMean(), 0.0001);
  });
}
 
Example #3
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/** test failure modes and distribution of nextGaussian() */
@Test
public void testNextGaussian() {
    try {
        randomData.nextGaussian(0, 0);
        Assert.fail("zero sigma -- MathIllegalArgumentException expected");
    } catch (MathIllegalArgumentException ex) {
        // ignored
    }
    SummaryStatistics u = new SummaryStatistics();
    for (int i = 0; i < largeSampleSize; i++) {
        u.addValue(randomData.nextGaussian(0, 1));
    }
    double xbar = u.getMean();
    double s = u.getStandardDeviation();
    double n = u.getN();
    /*
     * t-test at .001-level TODO: replace with externalized t-test, with
     * test statistic defined in TestStatistic
     */
    Assert.assertTrue(FastMath.abs(xbar) / (s / FastMath.sqrt(n)) < 3.29);
}
 
Example #4
Source File: PerfTestUtils.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Timing.
 *
 * @param repeatChunk Each timing measurement will done done for that
 * number of repeats of the code.
 * @param repeatStat Timing will be averaged over that number of runs.
 * @param runGC Call {@code System.gc()} between each timed block. When
 * set to {@code true}, the test will run much slower.
 * @param methods Codes being timed.
 * @return for each of the given {@code methods}, a
 * {@link StatisticalSummary} of the average times (in milliseconds)
 * taken by a single call to the {@code call} method (i.e. the time
 * taken by each timed block divided by {@code repeatChunk}).
 */
public static StatisticalSummary[] time(int repeatChunk,
                                        int repeatStat,
                                        boolean runGC,
                                        Callable<Double> ... methods) {
    final double[][][] times = timesAndResults(repeatChunk,
                                               repeatStat,
                                               runGC,
                                               methods);

    final int len = methods.length;
    final StatisticalSummary[] stats = new StatisticalSummary[len];
    for (int j = 0; j < len; j++) {
        final SummaryStatistics s = new SummaryStatistics();
        for (int k = 0; k < repeatStat; k++) {
            s.addValue(times[j][k][0]);
        }
        stats[j] = s.getSummary();
    }

    return stats;
}
 
Example #5
Source File: PerfTestUtils.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Timing.
 *
 * @param repeatChunk Each timing measurement will done done for that
 * number of repeats of the code.
 * @param repeatStat Timing will be averaged over that number of runs.
 * @param runGC Call {@code System.gc()} between each timed block. When
 * set to {@code true}, the test will run much slower.
 * @param methods Codes being timed.
 * @return for each of the given {@code methods}, a
 * {@link StatisticalSummary} of the average times (in milliseconds)
 * taken by a single call to the {@code call} method (i.e. the time
 * taken by each timed block divided by {@code repeatChunk}).
 */
public static StatisticalSummary[] time(int repeatChunk,
                                        int repeatStat,
                                        boolean runGC,
                                        Callable<Double> ... methods) {
    final double[][][] times = timesAndResults(repeatChunk,
                                               repeatStat,
                                               runGC,
                                               methods);

    final int len = methods.length;
    final StatisticalSummary[] stats = new StatisticalSummary[len];
    for (int j = 0; j < len; j++) {
        final SummaryStatistics s = new SummaryStatistics();
        for (int k = 0; k < repeatStat; k++) {
            s.addValue(times[j][k][0]);
        }
        stats[j] = s.getSummary();
    }

    return stats;
}
 
Example #6
Source File: EmpiricalDistribution.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Generates a random value from this distribution.
 * <strong>Preconditions:</strong><ul>
 * <li>the distribution must be loaded before invoking this method</li></ul>
 * @return the random value.
 * @throws MathIllegalStateException if the distribution has not been loaded
 */
public double getNextValue() throws MathIllegalStateException {

    if (!loaded) {
        throw new MathIllegalStateException(LocalizedFormats.DISTRIBUTION_NOT_LOADED);
    }

    // Start with a uniformly distributed random number in (0,1)
    double x = randomData.nextUniform(0,1);

    // Use this to select the bin and generate a Gaussian within the bin
    for (int i = 0; i < binCount; i++) {
       if (x <= upperBounds[i]) {
           SummaryStatistics stats = binStats.get(i);
           if (stats.getN() > 0) {
               if (stats.getStandardDeviation() > 0) {  // more than one obs
                    return randomData.nextGaussian
                        (stats.getMean(),stats.getStandardDeviation());
               } else {
                   return stats.getMean(); // only one obs in bin
               }
           }
       }
    }
    throw new MathIllegalStateException(LocalizedFormats.NO_BIN_SELECTED);
}
 
Example #7
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * This method calls the method that actually does the calculations (except
 * P-value).
 *
 * @param categoryData
 *            <code>Collection</code> of <code>double[]</code> arrays each
 *            containing data for one category
 * @return computed AnovaStats
 * @throws NullArgumentException
 *             if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException
 *             if the length of the <code>categoryData</code> array is less
 *             than 2 or a contained <code>double[]</code> array does not
 *             contain at least two values
 */
private AnovaStats anovaStats(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException {

    MathUtils.checkNotNull(categoryData);

    final Collection<SummaryStatistics> categoryDataSummaryStatistics =
            new ArrayList<SummaryStatistics>(categoryData.size());

    // convert arrays to SummaryStatistics
    for (final double[] data : categoryData) {
        final SummaryStatistics dataSummaryStatistics = new SummaryStatistics();
        categoryDataSummaryStatistics.add(dataSummaryStatistics);
        for (final double val : data) {
            dataSummaryStatistics.addValue(val);
        }
    }

    return anovaStats(categoryDataSummaryStatistics, false);

}
 
Example #8
Source File: Stats.java    From tablesaw with Apache License 2.0 6 votes vote down vote up
private static Stats getStats(NumericColumn<?> values, SummaryStatistics summaryStatistics) {
  Stats stats = new Stats("Column: " + values.name());
  stats.min = summaryStatistics.getMin();
  stats.max = summaryStatistics.getMax();
  stats.n = summaryStatistics.getN();
  stats.sum = summaryStatistics.getSum();
  stats.variance = summaryStatistics.getVariance();
  stats.populationVariance = summaryStatistics.getPopulationVariance();
  stats.quadraticMean = summaryStatistics.getQuadraticMean();
  stats.geometricMean = summaryStatistics.getGeometricMean();
  stats.mean = summaryStatistics.getMean();
  stats.standardDeviation = summaryStatistics.getStandardDeviation();
  stats.sumOfLogs = summaryStatistics.getSumOfLogs();
  stats.sumOfSquares = summaryStatistics.getSumsq();
  stats.secondMoment = summaryStatistics.getSecondMoment();
  return stats;
}
 
Example #9
Source File: CertifiedDataTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Test SummaryStatistics - implementations that do not store the data
 * and use single pass algorithms to compute statistics
*/
@Test
public void testSummaryStatistics() throws Exception {
    SummaryStatistics u = new SummaryStatistics();
    loadStats("data/PiDigits.txt", u);
    Assert.assertEquals("PiDigits: std", std, u.getStandardDeviation(), 1E-13);
    Assert.assertEquals("PiDigits: mean", mean, u.getMean(), 1E-13);

    loadStats("data/Mavro.txt", u);
    Assert.assertEquals("Mavro: std", std, u.getStandardDeviation(), 1E-14);
    Assert.assertEquals("Mavro: mean", mean, u.getMean(), 1E-14);

    loadStats("data/Michelso.txt", u);
    Assert.assertEquals("Michelso: std", std, u.getStandardDeviation(), 1E-13);
    Assert.assertEquals("Michelso: mean", mean, u.getMean(), 1E-13);

    loadStats("data/NumAcc1.txt", u);
    Assert.assertEquals("NumAcc1: std", std, u.getStandardDeviation(), 1E-14);
    Assert.assertEquals("NumAcc1: mean", mean, u.getMean(), 1E-14);

    loadStats("data/NumAcc2.txt", u);
    Assert.assertEquals("NumAcc2: std", std, u.getStandardDeviation(), 1E-14);
    Assert.assertEquals("NumAcc2: mean", mean, u.getMean(), 1E-14);
}
 
Example #10
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/** test failure modes and distribution of nextGaussian() */
@Test
public void testNextGaussian() {
    try {
        randomData.nextGaussian(0, 0);
        Assert.fail("zero sigma -- MathIllegalArgumentException expected");
    } catch (MathIllegalArgumentException ex) {
        // ignored
    }
    SummaryStatistics u = new SummaryStatistics();
    for (int i = 0; i < largeSampleSize; i++) {
        u.addValue(randomData.nextGaussian(0, 1));
    }
    double xbar = u.getMean();
    double s = u.getStandardDeviation();
    double n = u.getN();
    /*
     * t-test at .001-level TODO: replace with externalized t-test, with
     * test statistic defined in TestStatistic
     */
    Assert.assertTrue(FastMath.abs(xbar) / (s / FastMath.sqrt(n)) < 3.29);
}
 
Example #11
Source File: AbstractOwlSim.java    From owltools with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public double calculateOverallAnnotationSufficiencyForAttributeSet(Set<OWLClass> atts) throws UnknownOWLClassException {
	SummaryStatistics stats = computeAttributeSetSimilarityStats(atts);
	if ((this.getSummaryStatistics() == null) || Double.isNaN(this.getSummaryStatistics().mean.getMean())) {
		LOG.info("Stats have not been computed yet - doing this now");
		this.computeSystemStats();
	}
	// score = mean(atts)/mean(overall) + max(atts)/max(overall) + sum(atts)/mean(sum(overall))
	double overall_score = 0.0;
	Double mean_score = stats.getMean();
	Double max_score = stats.getMax();
	Double sum_score = stats.getSum();
	if (!(mean_score.isNaN() || max_score.isNaN() || sum_score.isNaN())) {
		mean_score = StatUtils.min(new double[]{(mean_score / this.overallSummaryStatsPerIndividual.mean.getMean()),1.0});
		max_score = StatUtils.min(new double[]{(max_score / this.overallSummaryStatsPerIndividual.max.getMax()),1.0});
		sum_score = StatUtils.min(new double[]{(sum_score / this.overallSummaryStatsPerIndividual.sum.getMean()),1.0});
		overall_score = (mean_score + max_score + sum_score) / 3;		
	}
	LOG.info("Overall mean: "+mean_score + " max: "+max_score + " sum:"+sum_score + " combined:"+overall_score);
	return overall_score;
}
 
Example #12
Source File: FeedUtils.java    From commafeed with Apache License 2.0 5 votes vote down vote up
public static Long averageTimeBetweenEntries(List<FeedEntry> entries) {
	if (entries.isEmpty() || entries.size() == 1) {
		return null;
	}

	List<Long> timestamps = getSortedTimestamps(entries);

	SummaryStatistics stats = new SummaryStatistics();
	for (int i = 0; i < timestamps.size() - 1; i++) {
		long diff = Math.abs(timestamps.get(i) - timestamps.get(i + 1));
		stats.addValue(diff);
	}
	return (long) stats.getMean();
}
 
Example #13
Source File: EffectSize.java    From rival with Apache License 2.0 5 votes vote down vote up
/**
 *
 * Estimation of effect size based on the distribution of score differences
 * (from paired samples).
 *
 * @param <V> type of the keys of each map.
 * @param baselineMetricPerDimension map for the baseline method, one value
 * for each user (dimension)
 * @param testMetricPerDimension map for the test method, one value for each
 * user (dimension)
 * @return the effect size.
 */
public static <V> double getEffectSizePairedT(final Map<V, Double> baselineMetricPerDimension, final Map<V, Double> testMetricPerDimension) {
    Set<V> overlap = new HashSet<V>(baselineMetricPerDimension.keySet());
    overlap.retainAll(testMetricPerDimension.keySet());

    SummaryStatistics differences = new SummaryStatistics();
    for (V key : overlap) {
        double diff = testMetricPerDimension.get(key) - baselineMetricPerDimension.get(key);
        differences.addValue(diff);
    }

    return getEffectSizePairedT(differences.getMean(), Math.sqrt(differences.getVariance()));
}
 
Example #14
Source File: EmpiricalDistribution.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * The within-bin smoothing kernel.
 *
 * @param bStats summary statistics for the bin
 * @return within-bin kernel parameterized by bStats
 */
protected RealDistribution getKernel(SummaryStatistics bStats) {
    // Default to Gaussian
    return new NormalDistribution(randomData.getRandomGenerator(),
            bStats.getMean(), bStats.getStandardDeviation(),
            NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
}
 
Example #15
Source File: EmpiricalDistribution.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * The within-bin smoothing kernel. Returns a Gaussian distribution
 * parameterized by {@code bStats}, unless the bin contains only one
 * observation, in which case a constant distribution is returned.
 *
 * @param bStats summary statistics for the bin
 * @return within-bin kernel parameterized by bStats
 */
protected RealDistribution getKernel(SummaryStatistics bStats) {
    if (bStats.getN() == 1) {
        return new ConstantRealDistribution(bStats.getMean());
    } else {
        return new NormalDistribution(randomData.getRandomGenerator(),
            bStats.getMean(), bStats.getStandardDeviation(),
            NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
    }
}
 
Example #16
Source File: NumberMapFunctions.java    From tablesaw with Apache License 2.0 5 votes vote down vote up
default DoubleColumn bin(int binCount) {
  double[] histogram = new double[binCount];
  EmpiricalDistribution distribution = new EmpiricalDistribution(binCount);
  distribution.load(asDoubleArray());
  int k = 0;
  for (SummaryStatistics stats : distribution.getBinStats()) {
    histogram[k++] = stats.getN();
  }
  return DoubleColumn.create(name() + "[binned]", histogram);
}
 
Example #17
Source File: StellarStatisticsFunctionsTest.java    From metron with Apache License 2.0 5 votes vote down vote up
@Test
public void testMergeProviders() {
  List<StatisticsProvider> providers = new ArrayList<>();
  /*
  Create 10 providers, each with a sample drawn from a gaussian distribution.
  Update the reference stats from commons math to ensure we are
   */
  GaussianRandomGenerator gaussian = new GaussianRandomGenerator(new MersenneTwister(1L));
  SummaryStatistics sStatistics= new SummaryStatistics();
  DescriptiveStatistics dStatistics = new DescriptiveStatistics();
  for(int i = 0;i < 10;++i) {
    List<Double> sample = new ArrayList<>();
    for(int j = 0;j < 100;++j) {
      double s = gaussian.nextNormalizedDouble();
      sample.add(s);
      sStatistics.addValue(s);
      dStatistics.addValue(s);
    }
    StatisticsProvider provider = (StatisticsProvider)run("STATS_ADD(STATS_INIT(), " + Joiner.on(",").join(sample) + ")"
                                                         , new HashMap<>()
                                                         );
    providers.add(provider);
  }

  /*
  Merge the providers and validate
   */
  Map<String, Object> providerVariables = new HashMap<>();
  for(int i = 0;i < providers.size();++i) {
    providerVariables.put("provider_" + i, providers.get(i));
  }
  StatisticsProvider mergedProvider =
          (StatisticsProvider)run("STATS_MERGE([" + Joiner.on(",").join(providerVariables.keySet()) + "])"
                                 , providerVariables
                                 );
  OnlineStatisticsProviderTest.validateStatisticsProvider(mergedProvider, sStatistics , dStatistics);

}
 
Example #18
Source File: RandomGeneratorAbstractTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testDoubleDirect() {
    SummaryStatistics sample = new SummaryStatistics();
    final int N = 10000;
    for (int i = 0; i < N; ++i) {
        sample.addValue(generator.nextDouble());
    }
    Assert.assertEquals("Note: This test will fail randomly about 1 in 100 times.",
            0.5, sample.getMean(), FastMath.sqrt(N/12.0) * 2.576);
    Assert.assertEquals(1.0 / (2.0 * FastMath.sqrt(3.0)),
                 sample.getStandardDeviation(), 0.01);
}
 
Example #19
Source File: EdgeGrid16Full.java    From cineast with MIT License 5 votes vote down vote up
@Override
public void processSegment(SegmentContainer shot) {
  if (shot.getMostRepresentativeFrame() == VideoFrame.EMPTY_VIDEO_FRAME) {
    return;
  }
  if (!phandler.idExists(shot.getId())) {
    SummaryStatistics[] stats = new SummaryStatistics[256];
    for (int i = 0; i < 256; ++i) {
      stats[i] = new SummaryStatistics();
    }
    List<VideoFrame> videoFrames = shot.getVideoFrames();
    List<Boolean> edgePixels = new ArrayList<>();
    for (VideoFrame f : videoFrames) {
      MultiImage img = f.getImage();
      edgePixels = EdgeImg.getEdgePixels(img, edgePixels);
      ArrayList<LinkedList<Boolean>> partition = GridPartitioner.partition(edgePixels,
          img.getWidth(), img.getHeight(), 16, 16);
      for (int i = 0; i < partition.size(); ++i) {
        LinkedList<Boolean> edge = partition.get(i);
        SummaryStatistics stat = stats[i];
        for (boolean b : edge) {
          stat.addValue(b ? 1 : 0);
        }
      }
    }
    float[] result = new float[64];
    for (int i = 0; i < 64; ++i) {
      result[i] = (float) stats[i].getMean();
    }
    persist(shot.getId(), new FloatVectorImpl(result));
  }
}
 
Example #20
Source File: RandomGeneratorAbstractTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testDoubleDirect() {
    SummaryStatistics sample = new SummaryStatistics();
    final int N = 10000;
    for (int i = 0; i < N; ++i) {
        sample.addValue(generator.nextDouble());
    }
    Assert.assertEquals("Note: This test will fail randomly about 1 in 100 times.",
            0.5, sample.getMean(), FastMath.sqrt(N/12.0) * 2.576);
    Assert.assertEquals(1.0 / (2.0 * FastMath.sqrt(3.0)),
                 sample.getStandardDeviation(), 0.01);
}
 
Example #21
Source File: AverageHPCP.java    From cineast with MIT License 5 votes vote down vote up
/**
 * Returns a list of feature vectors given a SegmentContainer.
 *
 * @param segment SegmentContainer for which to calculate the feature vectors.
 * @return List of HPCP Shingle feature vectors.
 */
private List<float[]> getFeatures(SegmentContainer segment) {
    /* Create STFT. IF this fails, return empty list. */
    Pair<Integer,Integer> parameters = FFTUtil.parametersForDuration(segment.getSamplingrate(), WINDOW_SIZE);
    STFT stft = segment.getSTFT(parameters.first, (parameters.first-2*parameters.second)/2,parameters.second, new HanningWindow());
    if (stft == null) {
      return new ArrayList<>();
    }

    HPCP hpcps = new HPCP(this.resolution, this.min_frequency, this.max_frequency);
    hpcps.addContribution(stft);

    /* Determine number of vectors that will result from the data. */
    int vectors = hpcps.size()/this.average;

    List<float[]> features = new ArrayList<>(vectors);
    for (int i = 0; i < vectors; i++) {
        float[] feature = new float[2*this.resolution.bins];
        SummaryStatistics[] statistics = new SummaryStatistics[this.resolution.bins];
        for (int j = 0; j<this.average; j++) {
            for (int k=0; k<this.resolution.bins;k++) {
                if (statistics[k] == null) {
                  statistics[k] = new SummaryStatistics();
                }
                statistics[k].addValue(hpcps.getHpcp(i*this.average + j)[k]);
            }
        }
        for (int k=0; k<this.resolution.bins;k++) {
            feature[2*k] = (float)statistics[k].getMean();
            feature[2*k+1] = (float)statistics[k].getStandardDeviation();
        }
        features.add(MathHelper.normalizeL2(feature));
    }
    return features;
}
 
Example #22
Source File: RandomGeneratorAbstractTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testDoubleDirect() {
    SummaryStatistics sample = new SummaryStatistics();
    final int N = 10000;
    for (int i = 0; i < N; ++i) {
        sample.addValue(generator.nextDouble());
    }
    Assert.assertEquals("Note: This test will fail randomly about 1 in 100 times.",
            0.5, sample.getMean(), FastMath.sqrt(N/12.0) * 2.576);
    Assert.assertEquals(1.0 / (2.0 * FastMath.sqrt(3.0)),
                 sample.getStandardDeviation(), 0.01);
}
 
Example #23
Source File: StatisticsMetadataImpl.java    From january with Eclipse Public License 1.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private StatisticsMetadataImpl(StatisticsMetadataImpl<T> statsMetadata) {
	hash = statsMetadata.hash;
	isize = statsMetadata.isize;
	clazz = statsMetadata.clazz;
	isFloat = statsMetadata.isFloat;
	dataset = statsMetadata.dataset.getView(false);
	axisStats = new HashMap<>(statsMetadata.axisStats);

	if (statsMetadata.mms != null) {
		mms = new MaxMin[COMBOS];
		for (int i = 0; i < mms.length; i++) {
			mms[i] = statsMetadata.mms[i];
		}
	}

	summaries = new SummaryStatistics[COMBOS][];
	for (int i = 0; i < summaries.length; i++) {
		SummaryStatistics[] oSummary = statsMetadata.summaries[i];
		if (oSummary != null) {
			SummaryStatistics[] nSummary = new SummaryStatistics[isize];
			summaries[i] = nSummary;
			for (int j = 0; j < isize; j++) {
				nSummary[j] = oSummary[j];
			}
		}
	}

	isDirty = statsMetadata.isDirty;
}
 
Example #24
Source File: AbstractOwlSim.java    From owltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public SummaryStatistics computeAttributeSetSimilarityStats(Set<OWLClass> atts)  {

		SummaryStatistics statsPerAttSet = new SummaryStatistics();
		//		Set<OWLClass> allClasses = getSourceOntology().getClassesInSignature(true);
		OWLDataFactory g = getSourceOntology().getOWLOntologyManager().getOWLDataFactory();

		for (OWLClass c : atts) {
			Double ic;
			try {
				ic = this.getInformationContentForAttribute(c);
				if (ic == null) { 
					//If a class hasn't been annotated in the loaded corpus, we will
					//assume that it is very rare, and assign MaxIC
					if (g.getOWLClass(c.getIRI()) != null) {
						ic = this.getSummaryStatistics().max.getMax();
					} else {
						throw new UnknownOWLClassException(c); }
				}
				if (ic.isInfinite() || ic.isNaN()) {
					//If a class hasn't been annotated in the loaded corpus, we will
					//assume that it is very rare, and assign MaxIC
					//a different option would be to skip adding this value, 
					//but i'm not sure that's wise
					ic = this.getSummaryStatistics().max.getMax();
				}
				//LOG.info("IC for "+c.toString()+"is: "+ic);
				statsPerAttSet.addValue(ic);	

			} catch (UnknownOWLClassException e) {
				//This is an extra catch here, but really it should be caught upstream.
				LOG.info("Unknown class "+c.toStringID()+" submitted for summary stats. Removed from calculation.");
				continue;
			}
		}
		return statsPerAttSet;
	}
 
Example #25
Source File: EmpiricalDistribution.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Private constructor to allow lazy initialisation of the RNG contained
 * in the {@link #randomData} instance variable.
 *
 * @param binCount number of bins
 * @param randomData Random data generator.
 */
private EmpiricalDistribution(int binCount,
                              RandomDataGenerator randomData) {
    super(randomData.getRandomGenerator());
    this.binCount = binCount;
    this.randomData = randomData;
    binStats = new ArrayList<SummaryStatistics>();
}
 
Example #26
Source File: EmpiricalDistribution.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Private constructor to allow lazy initialisation of the RNG contained
 * in the {@link #randomData} instance variable.
 *
 * @param binCount number of bins
 * @param randomData Random data generator.
 */
private EmpiricalDistribution(int binCount,
                              RandomDataGenerator randomData) {
    super(null);
    this.binCount = binCount;
    this.randomData = randomData;
    binStats = new ArrayList<SummaryStatistics>();
}
 
Example #27
Source File: EmpiricalDistribution.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override
public void computeBinStats() throws IOException {
    String str = null;
    double val = 0.0d;
    while ((str = inputStream.readLine()) != null) {
        val = Double.parseDouble(str);
        SummaryStatistics stats = binStats.get(findBin(val));
        stats.addValue(val);
    }

    inputStream.close();
    inputStream = null;
}
 
Example #28
Source File: JacobianMatricesTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testLowAccuracyExternalDifferentiation()
    throws NumberIsTooSmallException, DimensionMismatchException,
           MaxCountExceededException, NoBracketingException {
    // this test does not really test JacobianMatrices,
    // it only shows that WITHOUT this class, attempting to recover
    // the jacobians from external differentiation on simple integration
    // results with low accuracy gives very poor results. In fact,
    // the curves dy/dp = g(b) when b varies from 2.88 to 3.08 are
    // essentially noise.
    // This test is taken from Hairer, Norsett and Wanner book
    // Solving Ordinary Differential Equations I (Nonstiff problems),
    // the curves dy/dp = g(b) are in figure 6.5
    FirstOrderIntegrator integ =
        new DormandPrince54Integrator(1.0e-8, 100.0, new double[] { 1.0e-4, 1.0e-4 }, new double[] { 1.0e-4, 1.0e-4 });
    double hP = 1.0e-12;
    SummaryStatistics residualsP0 = new SummaryStatistics();
    SummaryStatistics residualsP1 = new SummaryStatistics();
    for (double b = 2.88; b < 3.08; b += 0.001) {
        Brusselator brusselator = new Brusselator(b);
        double[] y = { 1.3, b };
        integ.integrate(brusselator, 0, y, 20.0, y);
        double[] yP = { 1.3, b + hP };
        integ.integrate(brusselator, 0, yP, 20.0, yP);
        residualsP0.addValue((yP[0] - y[0]) / hP - brusselator.dYdP0());
        residualsP1.addValue((yP[1] - y[1]) / hP - brusselator.dYdP1());
    }
    Assert.assertTrue((residualsP0.getMax() - residualsP0.getMin()) > 500);
    Assert.assertTrue(residualsP0.getStandardDeviation() > 30);
    Assert.assertTrue((residualsP1.getMax() - residualsP1.getMin()) > 700);
    Assert.assertTrue(residualsP1.getStandardDeviation() > 40);
}
 
Example #29
Source File: StatisticsMetadataImpl.java    From january with Eclipse Public License 1.0 5 votes vote down vote up
@Override
public double getVariance(boolean isWholePopulation, boolean... ignoreInvalids) { // TODO
	int idx = refresh(false, ignoreInvalids);
	SummaryStatistics[] summary = summaries[idx];
	if (isize == 1) {
		return isWholePopulation ? summary[0].getPopulationVariance() : summary[0].getVariance();
	} else {
		double result = 0;
		for (int i = 0; i < isize; i++) {
			result += isWholePopulation ? summary[i].getPopulationVariance() : summary[i].getVariance();
		}
		return result;
	}
}
 
Example #30
Source File: TabletStatistic.java    From timely with Apache License 2.0 5 votes vote down vote up
TabletStatistic(String keyName, int keyCount, Map<TabletStatisticType, SummaryStatistics> stats) {
    this.keyName = keyName;
    this.keyCount = keyCount;
    this.stats = stats;

    timeStat = stats.getOrDefault(TabletStatisticType.TIME, new SummaryStatistics());
    sizeStat = stats.getOrDefault(TabletStatisticType.SIZE, new SummaryStatistics());
}