org.apache.commons.math3.stat.descriptive.moment.Variance Java Examples

The following examples show how to use org.apache.commons.math3.stat.descriptive.moment.Variance. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SliceSamplerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Tests slice sampling of a monotonic beta distribution as an example of sampling of a bounded random variable.
 * Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
 * respectively.
 */
@Test
public void testSliceSamplingOfMonotonicBetaDistribution() {
    rng.setSeed(RANDOM_SEED);

    final double alpha = 10.;
    final double beta = 1.;
    final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
    final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
    final double mean = betaDistribution.getNumericalMean();
    final double variance = betaDistribution.getNumericalVariance();

    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numSamples = 10000;
    final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleVariance = new Variance().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
 
Example #2
Source File: SumOfClusterVariances.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Override
public double score(final List<? extends Cluster<T>> clusters) {
    double varianceSum = 0.0;
    for (final Cluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            final Clusterable center = centroidOf(cluster);

            // compute the distance variance of the current cluster
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(distance(point, center));
            }
            varianceSum += stat.getResult();

        }
    }
    return varianceSum;
}
 
Example #3
Source File: Covariance.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Compute a covariance matrix from a matrix whose columns represent
 * covariates.
 * @param matrix input matrix (must have at least one column and two rows)
 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
 * @return covariance matrix
 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
 */
protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
throws MathIllegalArgumentException {
    int dimension = matrix.getColumnDimension();
    Variance variance = new Variance(biasCorrected);
    RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
    for (int i = 0; i < dimension; i++) {
        for (int j = 0; j < i; j++) {
          double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
          outMatrix.setEntry(i, j, cov);
          outMatrix.setEntry(j, i, cov);
        }
        outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
    }
    return outMatrix;
}
 
Example #4
Source File: SliceSamplerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Tests slice sampling of a peaked beta distribution as an example of sampling of a bounded random variable.
 * Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
 * respectively.
 */
@Test
public void testSliceSamplingOfPeakedBetaDistribution() {
    rng.setSeed(RANDOM_SEED);

    final double alpha = 10.;
    final double beta = 4.;
    final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
    final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;
    final double mean = betaDistribution.getNumericalMean();
    final double variance = betaDistribution.getNumericalVariance();

    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numSamples = 10000;
    final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleVariance = new Variance().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
 
Example #5
Source File: Covariance.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Compute a covariance matrix from a matrix whose columns represent
 * covariates.
 * @param matrix input matrix (must have at least one column and two rows)
 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
 * @return covariance matrix
 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
 */
protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
throws MathIllegalArgumentException {
    int dimension = matrix.getColumnDimension();
    Variance variance = new Variance(biasCorrected);
    RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
    for (int i = 0; i < dimension; i++) {
        for (int j = 0; j < i; j++) {
          double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
          outMatrix.setEntry(i, j, cov);
          outMatrix.setEntry(j, i, cov);
        }
        outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
    }
    return outMatrix;
}
 
Example #6
Source File: Covariance.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Compute a covariance matrix from a matrix whose columns represent
 * covariates.
 * @param matrix input matrix (must have at least one column and two rows)
 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
 * @return covariance matrix
 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
 */
protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
throws MathIllegalArgumentException {
    int dimension = matrix.getColumnDimension();
    Variance variance = new Variance(biasCorrected);
    RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
    for (int i = 0; i < dimension; i++) {
        for (int j = 0; j < i; j++) {
          double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
          outMatrix.setEntry(i, j, cov);
          outMatrix.setEntry(j, i, cov);
        }
        outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
    }
    return outMatrix;
}
 
Example #7
Source File: SliceSamplerUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Test slice sampling of a peaked beta distribution as an example of sampling of a bounded random variable.
 * Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
 * respectively.
 */
@Test
public void testSliceSamplingOfPeakedBetaDistribution() {
    rng.setSeed(RANDOM_SEED);

    final double alpha = 10.;
    final double beta = 4.;
    final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
    final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;

    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numSamples = 10000;
    final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));

    final double mean = betaDistribution.getNumericalMean();
    final double variance = betaDistribution.getNumericalVariance();
    final double sampleMean = new Mean().evaluate(samples);
    final double sampleVariance = new Variance().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
 
Example #8
Source File: SliceSamplerUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Test slice sampling of a monotonic beta distribution as an example of sampling of a bounded random variable.
 * Checks that input mean and variance are recovered by 10000 samples to a relative error of 0.5% and 2%,
 * respectively.
 */
@Test
public void testSliceSamplingOfMonotonicBetaDistribution() {
    rng.setSeed(RANDOM_SEED);

    final double alpha = 10.;
    final double beta = 1.;
    final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
    final Function<Double, Double> betaLogPDF = betaDistribution::logDensity;

    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numSamples = 10000;
    final SliceSampler betaSampler = new SliceSampler(rng, betaLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples));

    final double mean = betaDistribution.getNumericalMean();
    final double variance = betaDistribution.getNumericalVariance();
    final double sampleMean = new Mean().evaluate(samples);
    final double sampleVariance = new Variance().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleVariance, variance), 0., 0.02);
}
 
Example #9
Source File: Covariance.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Compute a covariance matrix from a matrix whose columns represent
 * covariates.
 * @param matrix input matrix (must have at least one column and two rows)
 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
 * @return covariance matrix
 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
 */
protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
throws MathIllegalArgumentException {
    int dimension = matrix.getColumnDimension();
    Variance variance = new Variance(biasCorrected);
    RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
    for (int i = 0; i < dimension; i++) {
        for (int j = 0; j < i; j++) {
          double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
          outMatrix.setEntry(i, j, cov);
          outMatrix.setEntry(j, i, cov);
        }
        outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
    }
    return outMatrix;
}
 
Example #10
Source File: SummaryStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * JIRA: MATH-691
 */
@Test
public void testOverrideVarianceWithMathClass() {
    double[] scores = {1, 2, 3, 4};
    SummaryStatistics stats = new SummaryStatistics();
    stats.setVarianceImpl(new Variance(false)); //use "population variance"
    for(double i : scores) {
      stats.addValue(i);
    }
    Assert.assertEquals((new Variance(false)).evaluate(scores),stats.getVariance(), 0); 
}
 
Example #11
Source File: KMeansPlusPlusClusterer.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get a random point from the {@link Cluster} with the largest distance variance.
 *
 * @param clusters the {@link Cluster}s to search
 * @return a random point from the selected cluster
 */
private T getPointFromLargestVarianceCluster(final Collection<Cluster<T>> clusters) {

    double maxVariance = Double.NEGATIVE_INFINITY;
    Cluster<T> selected = null;
    for (final Cluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            // compute the distance variance of the current cluster
            final T center = cluster.getCenter();
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(point.distanceFrom(center));
            }
            final double variance = stat.getResult();

            // select the cluster with the largest variance
            if (variance > maxVariance) {
                maxVariance = variance;
                selected = cluster;
            }

        }
    }

    // did we find at least one non-empty cluster ?
    if (selected == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    // extract a random point from the cluster
    final List<T> selectedPoints = selected.getPoints();
    return selectedPoints.remove(random.nextInt(selectedPoints.size()));

}
 
Example #12
Source File: DescriptiveStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testSummaryConsistency() {
    final DescriptiveStatistics dstats = new DescriptiveStatistics();
    final SummaryStatistics sstats = new SummaryStatistics();
    final int windowSize = 5;
    dstats.setWindowSize(windowSize);
    final double tol = 1E-12;
    for (int i = 0; i < 20; i++) {
        dstats.addValue(i);
        sstats.clear();
        double[] values = dstats.getValues();
        for (int j = 0; j < values.length; j++) {
            sstats.addValue(values[j]);
        }
        TestUtils.assertEquals(dstats.getMean(), sstats.getMean(), tol);
        TestUtils.assertEquals(new Mean().evaluate(values), dstats.getMean(), tol);
        TestUtils.assertEquals(dstats.getMax(), sstats.getMax(), tol);
        TestUtils.assertEquals(new Max().evaluate(values), dstats.getMax(), tol);
        TestUtils.assertEquals(dstats.getGeometricMean(), sstats.getGeometricMean(), tol);
        TestUtils.assertEquals(new GeometricMean().evaluate(values), dstats.getGeometricMean(), tol);
        TestUtils.assertEquals(dstats.getMin(), sstats.getMin(), tol);
        TestUtils.assertEquals(new Min().evaluate(values), dstats.getMin(), tol);
        TestUtils.assertEquals(dstats.getStandardDeviation(), sstats.getStandardDeviation(), tol);
        TestUtils.assertEquals(dstats.getVariance(), sstats.getVariance(), tol);
        TestUtils.assertEquals(new Variance().evaluate(values), dstats.getVariance(), tol);
        TestUtils.assertEquals(dstats.getSum(), sstats.getSum(), tol);
        TestUtils.assertEquals(new Sum().evaluate(values), dstats.getSum(), tol);
        TestUtils.assertEquals(dstats.getSumsq(), sstats.getSumsq(), tol);
        TestUtils.assertEquals(new SumOfSquares().evaluate(values), dstats.getSumsq(), tol);
        TestUtils.assertEquals(dstats.getPopulationVariance(), sstats.getPopulationVariance(), tol);
        TestUtils.assertEquals(new Variance(false).evaluate(values), dstats.getPopulationVariance(), tol);
    }
}
 
Example #13
Source File: CNLOHCaller.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Attempt to get an idea of segment mean variance near copy neutral.
 *
 * @param segments Never {@code null}
 * @return variance of segment mean (in CR space) of segments that are "close enough" to copy neutral.
 *   Zero if no segments are "close enough"
 */
private double calculateVarianceOfCopyNeutralSegmentMeans(final List<ACNVModeledSegment> segments, final double meanBiasInCR) {
    Utils.nonNull(segments);

    // Only consider values "close enough" to copy neutral (CR == 1).
    final double neutralCR = 1 + meanBiasInCR;
    final double[] neutralSegmentMeans = segments.stream()
            .mapToDouble(ACNVModeledSegment::getSegmentMeanInCRSpace)
            .filter(m -> Math.abs(m - neutralCR) < CLOSE_ENOUGH_TO_COPY_NEUTRAL_IN_CR).toArray();
    return new Variance().evaluate(neutralSegmentMeans);
}
 
Example #14
Source File: KMeansPlusPlusClusterer.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get a random point from the {@link Cluster} with the largest distance variance.
 *
 * @param clusters the {@link Cluster}s to search
 * @return a random point from the selected cluster
 * @throws ConvergenceException if clusters are all empty
 */
private T getPointFromLargestVarianceCluster(final Collection<Cluster<T>> clusters)
throws ConvergenceException {

    double maxVariance = Double.NEGATIVE_INFINITY;
    Cluster<T> selected = null;
    for (final Cluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            // compute the distance variance of the current cluster
            final T center = cluster.getCenter();
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(point.distanceFrom(center));
            }
            final double variance = stat.getResult();

            // select the cluster with the largest variance
            if (variance > maxVariance) {
                maxVariance = variance;
                selected = cluster;
            }

        }
    }

    // did we find at least one non-empty cluster ?
    if (selected == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    // extract a random point from the cluster
    final List<T> selectedPoints = selected.getPoints();
    return selectedPoints.remove(random.nextInt(selectedPoints.size()));

}
 
Example #15
Source File: SummaryStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * JIRA: MATH-691
 */
@Test
public void testOverrideVarianceWithMathClass() throws Exception {
    double[] scores = {1, 2, 3, 4};
    SummaryStatistics stats = new SummaryStatistics();
    stats.setVarianceImpl(new Variance(false)); //use "population variance"
    for(double i : scores) {
      stats.addValue(i);
    }
    Assert.assertEquals((new Variance(false)).evaluate(scores),stats.getVariance(), 0); 
}
 
Example #16
Source File: CorrelationExample.java    From Java-Data-Analysis with MIT License 5 votes vote down vote up
static double rho(double[][] data) {
    Variance v = new Variance();
    double varX = v.evaluate(data[0]);
    double sigX = Math.sqrt(varX);
    double varY = v.evaluate(data[1]);
    double sigY = Math.sqrt(varY);
    Covariance c = new Covariance(data);
    double sigXY = c.covariance(data[0], data[1]);
    return sigXY/(sigX*sigY);
}
 
Example #17
Source File: TestDoubleVarianceAggregation.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
protected Number getExpectedValue(int start, int length)
{
    if (length < 2) {
        return null;
    }

    double[] values = new double[length];
    for (int i = 0; i < length; i++) {
        values[i] = start + i;
    }

    Variance variance = new Variance();
    return variance.evaluate(values);
}
 
Example #18
Source File: OwlSimVariance.java    From owltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * This implementation can be heavily optimized
 */
public Map<String, Double> getTopNVarianceValues(Set<OWLClass> candidates, int n) {
	double[] icData = refBasedStats.retrieveCandidatesIC(candidates);

	// Compute variance against all cached reference entities
	Map<IRI, Double> iResult = new LinkedHashMap<IRI, Double>();
	List<Double> list = new ArrayList<Double>();
	for (IRI reference : refBasedStats.getReferenceStats().keySet()) {
		double variance = new Variance().evaluate(icData, refBasedStats.getReferenceStats().get(reference).getMean());
		list.add(variance);
		iResult.put(reference, variance);
	}
	// Sort variances ascendantly
	Collections.sort(list);
	
	// Return top-N variances
	
	int iterSize = n < list.size() ? n : list.size();
	Map<String, Double> result = new LinkedHashMap<String, Double>();
	for (int i = 0 ;i < iterSize ; i++) {
		double varValue = list.get(i);
		List<String> refURIs = findRefURIs(varValue, iResult);
		for (String uri : refURIs) {
			result.put(uri, varValue);
		}
	}
	
	return result;
}
 
Example #19
Source File: OwlSimVariance.java    From owltools with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public double getVarianceValue(Set<OWLClass> candidates, IRI referenceEntity) throws OwlSimVarianceEntityReferenceNotFoundException {
	// Reference entity does not exist - this should probably not happen.
	if (!refBasedStats.getReferenceStats().containsKey(referenceEntity)) {
		throw new OwlSimVarianceEntityReferenceNotFoundException(referenceEntity);
	}
	
	// Create IC list for candidates provided
	double[] icData = refBasedStats.retrieveCandidatesIC(candidates);
	// Return variance against the given reference concept
	return new Variance().evaluate(icData, refBasedStats.getReferenceStats().get(referenceEntity).getMean());
}
 
Example #20
Source File: DescriptiveStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testSummaryConsistency() {
    final DescriptiveStatistics dstats = new DescriptiveStatistics();
    final SummaryStatistics sstats = new SummaryStatistics();
    final int windowSize = 5;
    dstats.setWindowSize(windowSize);
    final double tol = 1E-12;
    for (int i = 0; i < 20; i++) {
        dstats.addValue(i);
        sstats.clear();
        double[] values = dstats.getValues();
        for (int j = 0; j < values.length; j++) {
            sstats.addValue(values[j]);
        }
        TestUtils.assertEquals(dstats.getMean(), sstats.getMean(), tol);
        TestUtils.assertEquals(new Mean().evaluate(values), dstats.getMean(), tol);
        TestUtils.assertEquals(dstats.getMax(), sstats.getMax(), tol);
        TestUtils.assertEquals(new Max().evaluate(values), dstats.getMax(), tol);
        TestUtils.assertEquals(dstats.getGeometricMean(), sstats.getGeometricMean(), tol);
        TestUtils.assertEquals(new GeometricMean().evaluate(values), dstats.getGeometricMean(), tol);
        TestUtils.assertEquals(dstats.getMin(), sstats.getMin(), tol);
        TestUtils.assertEquals(new Min().evaluate(values), dstats.getMin(), tol);
        TestUtils.assertEquals(dstats.getStandardDeviation(), sstats.getStandardDeviation(), tol);
        TestUtils.assertEquals(dstats.getVariance(), sstats.getVariance(), tol);
        TestUtils.assertEquals(new Variance().evaluate(values), dstats.getVariance(), tol);
        TestUtils.assertEquals(dstats.getSum(), sstats.getSum(), tol);
        TestUtils.assertEquals(new Sum().evaluate(values), dstats.getSum(), tol);
        TestUtils.assertEquals(dstats.getSumsq(), sstats.getSumsq(), tol);
        TestUtils.assertEquals(new SumOfSquares().evaluate(values), dstats.getSumsq(), tol);
        TestUtils.assertEquals(dstats.getPopulationVariance(), sstats.getPopulationVariance(), tol);
        TestUtils.assertEquals(new Variance(false).evaluate(values), dstats.getPopulationVariance(), tol);
    }
}
 
Example #21
Source File: TestLongVariancePopAggregation.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
protected Number getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }

    double[] values = new double[length];
    for (int i = 0; i < length; i++) {
        values[i] = start + i;
    }

    Variance variance = new Variance(false);
    return variance.evaluate(values);
}
 
Example #22
Source File: DescriptiveStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testSummaryConsistency() {
    final DescriptiveStatistics dstats = new DescriptiveStatistics();
    final SummaryStatistics sstats = new SummaryStatistics();
    final int windowSize = 5;
    dstats.setWindowSize(windowSize);
    final double tol = 1E-12;
    for (int i = 0; i < 20; i++) {
        dstats.addValue(i);
        sstats.clear();
        double[] values = dstats.getValues();
        for (int j = 0; j < values.length; j++) {
            sstats.addValue(values[j]);
        }
        TestUtils.assertEquals(dstats.getMean(), sstats.getMean(), tol);
        TestUtils.assertEquals(new Mean().evaluate(values), dstats.getMean(), tol);
        TestUtils.assertEquals(dstats.getMax(), sstats.getMax(), tol);
        TestUtils.assertEquals(new Max().evaluate(values), dstats.getMax(), tol);
        TestUtils.assertEquals(dstats.getGeometricMean(), sstats.getGeometricMean(), tol);
        TestUtils.assertEquals(new GeometricMean().evaluate(values), dstats.getGeometricMean(), tol);
        TestUtils.assertEquals(dstats.getMin(), sstats.getMin(), tol);
        TestUtils.assertEquals(new Min().evaluate(values), dstats.getMin(), tol);
        TestUtils.assertEquals(dstats.getStandardDeviation(), sstats.getStandardDeviation(), tol);
        TestUtils.assertEquals(dstats.getVariance(), sstats.getVariance(), tol);
        TestUtils.assertEquals(new Variance().evaluate(values), dstats.getVariance(), tol);
        TestUtils.assertEquals(dstats.getSum(), sstats.getSum(), tol);
        TestUtils.assertEquals(new Sum().evaluate(values), dstats.getSum(), tol);
        TestUtils.assertEquals(dstats.getSumsq(), sstats.getSumsq(), tol);
        TestUtils.assertEquals(new SumOfSquares().evaluate(values), dstats.getSumsq(), tol);
        TestUtils.assertEquals(dstats.getPopulationVariance(), sstats.getPopulationVariance(), tol);
        TestUtils.assertEquals(new Variance(false).evaluate(values), dstats.getPopulationVariance(), tol);
    }
}
 
Example #23
Source File: SummaryStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * JIRA: MATH-691
 */
@Test
public void testOverrideVarianceWithMathClass() {
    double[] scores = {1, 2, 3, 4};
    SummaryStatistics stats = new SummaryStatistics();
    stats.setVarianceImpl(new Variance(false)); //use "population variance"
    for(double i : scores) {
      stats.addValue(i);
    }
    Assert.assertEquals((new Variance(false)).evaluate(scores),stats.getVariance(), 0); 
}
 
Example #24
Source File: DescriptiveStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testSummaryConsistency() {
    final DescriptiveStatistics dstats = new DescriptiveStatistics();
    final SummaryStatistics sstats = new SummaryStatistics();
    final int windowSize = 5;
    dstats.setWindowSize(windowSize);
    final double tol = 1E-12;
    for (int i = 0; i < 20; i++) {
        dstats.addValue(i);
        sstats.clear();
        double[] values = dstats.getValues();
        for (int j = 0; j < values.length; j++) {
            sstats.addValue(values[j]);
        }
        TestUtils.assertEquals(dstats.getMean(), sstats.getMean(), tol);
        TestUtils.assertEquals(new Mean().evaluate(values), dstats.getMean(), tol);
        TestUtils.assertEquals(dstats.getMax(), sstats.getMax(), tol);
        TestUtils.assertEquals(new Max().evaluate(values), dstats.getMax(), tol);
        TestUtils.assertEquals(dstats.getGeometricMean(), sstats.getGeometricMean(), tol);
        TestUtils.assertEquals(new GeometricMean().evaluate(values), dstats.getGeometricMean(), tol);
        TestUtils.assertEquals(dstats.getMin(), sstats.getMin(), tol);
        TestUtils.assertEquals(new Min().evaluate(values), dstats.getMin(), tol);
        TestUtils.assertEquals(dstats.getStandardDeviation(), sstats.getStandardDeviation(), tol);
        TestUtils.assertEquals(dstats.getVariance(), sstats.getVariance(), tol);
        TestUtils.assertEquals(new Variance().evaluate(values), dstats.getVariance(), tol);
        TestUtils.assertEquals(dstats.getSum(), sstats.getSum(), tol);
        TestUtils.assertEquals(new Sum().evaluate(values), dstats.getSum(), tol);
        TestUtils.assertEquals(dstats.getSumsq(), sstats.getSumsq(), tol);
        TestUtils.assertEquals(new SumOfSquares().evaluate(values), dstats.getSumsq(), tol);
        TestUtils.assertEquals(dstats.getPopulationVariance(), sstats.getPopulationVariance(), tol);
        TestUtils.assertEquals(new Variance(false).evaluate(values), dstats.getPopulationVariance(), tol);
    }
}
 
Example #25
Source File: DescriptiveStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testSummaryConsistency() {
    final DescriptiveStatistics dstats = new DescriptiveStatistics();
    final SummaryStatistics sstats = new SummaryStatistics();
    final int windowSize = 5;
    dstats.setWindowSize(windowSize);
    final double tol = 1E-12;
    for (int i = 0; i < 20; i++) {
        dstats.addValue(i);
        sstats.clear();
        double[] values = dstats.getValues();
        for (int j = 0; j < values.length; j++) {
            sstats.addValue(values[j]);
        }
        TestUtils.assertEquals(dstats.getMean(), sstats.getMean(), tol);
        TestUtils.assertEquals(new Mean().evaluate(values), dstats.getMean(), tol);
        TestUtils.assertEquals(dstats.getMax(), sstats.getMax(), tol);
        TestUtils.assertEquals(new Max().evaluate(values), dstats.getMax(), tol);
        TestUtils.assertEquals(dstats.getGeometricMean(), sstats.getGeometricMean(), tol);
        TestUtils.assertEquals(new GeometricMean().evaluate(values), dstats.getGeometricMean(), tol);
        TestUtils.assertEquals(dstats.getMin(), sstats.getMin(), tol);
        TestUtils.assertEquals(new Min().evaluate(values), dstats.getMin(), tol);
        TestUtils.assertEquals(dstats.getStandardDeviation(), sstats.getStandardDeviation(), tol);
        TestUtils.assertEquals(dstats.getVariance(), sstats.getVariance(), tol);
        TestUtils.assertEquals(new Variance().evaluate(values), dstats.getVariance(), tol);
        TestUtils.assertEquals(dstats.getSum(), sstats.getSum(), tol);
        TestUtils.assertEquals(new Sum().evaluate(values), dstats.getSum(), tol);
        TestUtils.assertEquals(dstats.getSumsq(), sstats.getSumsq(), tol);
        TestUtils.assertEquals(new SumOfSquares().evaluate(values), dstats.getSumsq(), tol);
        TestUtils.assertEquals(dstats.getPopulationVariance(), sstats.getPopulationVariance(), tol);
        TestUtils.assertEquals(new Variance(false).evaluate(values), dstats.getPopulationVariance(), tol);
    }
}
 
Example #26
Source File: KMeansPlusPlusClusterer.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get a random point from the {@link Cluster} with the largest distance variance.
 *
 * @param clusters the {@link Cluster}s to search
 * @return a random point from the selected cluster
 * @throws ConvergenceException if clusters are all empty
 */
private T getPointFromLargestVarianceCluster(final Collection<CentroidCluster<T>> clusters)
        throws ConvergenceException {

    double maxVariance = Double.NEGATIVE_INFINITY;
    Cluster<T> selected = null;
    for (final CentroidCluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            // compute the distance variance of the current cluster
            final Clusterable center = cluster.getCenter();
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(distance(point, center));
            }
            final double variance = stat.getResult();

            // select the cluster with the largest variance
            if (variance > maxVariance) {
                maxVariance = variance;
                selected = cluster;
            }

        }
    }

    // did we find at least one non-empty cluster ?
    if (selected == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    // extract a random point from the cluster
    final List<T> selectedPoints = selected.getPoints();
    return selectedPoints.remove(random.nextInt(selectedPoints.size()));

}
 
Example #27
Source File: KMeansPlusPlusClusterer.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get a random point from the {@link Cluster} with the largest distance variance.
 *
 * @param clusters the {@link Cluster}s to search
 * @return a random point from the selected cluster
 * @throws ConvergenceException if clusters are all empty
 */
private T getPointFromLargestVarianceCluster(final Collection<Cluster<T>> clusters)
throws ConvergenceException {

    double maxVariance = Double.NEGATIVE_INFINITY;
    Cluster<T> selected = null;
    for (final Cluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            // compute the distance variance of the current cluster
            final T center = cluster.getCenter();
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(point.distanceFrom(center));
            }
            final double variance = stat.getResult();

            // select the cluster with the largest variance
            if (variance > maxVariance) {
                maxVariance = variance;
                selected = cluster;
            }

        }
    }

    // did we find at least one non-empty cluster ?
    if (selected == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    // extract a random point from the cluster
    final List<T> selectedPoints = selected.getPoints();
    return selectedPoints.remove(random.nextInt(selectedPoints.size()));

}
 
Example #28
Source File: SummaryStatisticsTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * JIRA: MATH-691
 */
@Test
public void testOverrideVarianceWithMathClass() {
    double[] scores = {1, 2, 3, 4};
    SummaryStatistics stats = new SummaryStatistics();
    stats.setVarianceImpl(new Variance(false)); //use "population variance"
    for(double i : scores) {
      stats.addValue(i);
    }
    Assert.assertEquals((new Variance(false)).evaluate(scores),stats.getVariance(), 0); 
}
 
Example #29
Source File: KMeansPlusPlusClusterer.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get a random point from the {@link Cluster} with the largest distance variance.
 *
 * @param clusters the {@link Cluster}s to search
 * @return a random point from the selected cluster
 * @throws ConvergenceException if clusters are all empty
 */
private T getPointFromLargestVarianceCluster(final Collection<Cluster<T>> clusters)
throws ConvergenceException {

    double maxVariance = Double.NEGATIVE_INFINITY;
    Cluster<T> selected = null;
    for (final Cluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            // compute the distance variance of the current cluster
            final T center = cluster.getCenter();
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(point.distanceFrom(center));
            }
            final double variance = stat.getResult();

            // select the cluster with the largest variance
            if (variance > maxVariance) {
                maxVariance = variance;
                selected = cluster;
            }

        }
    }

    // did we find at least one non-empty cluster ?
    if (selected == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    // extract a random point from the cluster
    final List<T> selectedPoints = selected.getPoints();
    return selectedPoints.remove(random.nextInt(selectedPoints.size()));

}
 
Example #30
Source File: Covariance.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Compute a covariance matrix from a matrix whose columns represent
 * covariates.
 * @param matrix input matrix (must have at least two columns and two rows)
 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
 * @return covariance matrix
 */
protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
    int dimension = matrix.getColumnDimension();
    Variance variance = new Variance(biasCorrected);
    RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
    for (int i = 0; i < dimension; i++) {
        for (int j = 0; j < i; j++) {
          double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
          outMatrix.setEntry(i, j, cov);
          outMatrix.setEntry(j, i, cov);
        }
        outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
    }
    return outMatrix;
}