org.apache.commons.math3.stat.descriptive.moment.StandardDeviation Java Examples

The following examples show how to use org.apache.commons.math3.stat.descriptive.moment.StandardDeviation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UserProfileEigenModeler.java    From Eagle with Apache License 2.0 6 votes vote down vote up
private void computeStats(RealMatrix m){

        if(m.getColumnDimension() != this.cmdTypes.length){
            LOG.error("Please fix the commands list in config file");
            return;
        }
        statistics = new UserCommandStatistics[m.getColumnDimension()];
        for(int i=0; i<m.getColumnDimension(); i++){
            UserCommandStatistics stats = new UserCommandStatistics();
            stats.setCommandName(this.cmdTypes[i]);
            RealVector colData = m.getColumnVector(i);
            StandardDeviation deviation = new StandardDeviation();
            double stddev = deviation.evaluate(colData.toArray());
            //LOG.info("stddev is nan?" + (stddev == Double.NaN? "yes":"no"));
            if(stddev <= lowVarianceVal)
                stats.setLowVariant(true);
            else
                stats.setLowVariant(false);
            stats.setStddev(stddev);
            Mean mean = new Mean();
            double mu = mean.evaluate(colData.toArray());
            //LOG.info("mu is nan?" + (mu == Double.NaN? "yes":"no"));
            stats.setMean(mu);
            statistics[i] = stats;
        }
    }
 
Example #2
Source File: GCBiasCorrectorUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testGCCorrection() {
    final int numSamples = 5;
    final int numIntervals = 10000;

    final Pair<RealMatrix, double[]> data = simulateData(numSamples, numIntervals);
    final RealMatrix readCounts = data.getLeft();
    final double[] intervalGCContent = data.getRight();

    final RealMatrix correctedCoverage = readCounts.copy();
    GCBiasCorrector.correctGCBias(correctedCoverage, intervalGCContent);
    Utils.nonNull(correctedCoverage);
    final StandardDeviation stdevEvaluator = new StandardDeviation();
    final double[] stdDevsBySample = IntStream.range(0, correctedCoverage.getRowDimension())
            .mapToDouble(r -> stdevEvaluator.evaluate(correctedCoverage.getRow(r))).toArray();
    Arrays.stream(stdDevsBySample).forEach(x -> Assert.assertTrue(x < NON_GC_BIAS_NOISE_LEVEL * MEAN_READ_DEPTH));

    //check that GC-bias correction is approximately idempotent -- if you correct again, very little should happen
    final RealMatrix recorrectedCoverage = correctedCoverage.copy();
    GCBiasCorrector.correctGCBias(recorrectedCoverage, intervalGCContent);
    final double correctedChange = correctedCoverage.subtract(readCounts).getFrobeniusNorm();
    final double recorrectedChange = recorrectedCoverage.subtract(correctedCoverage).getFrobeniusNorm();
    Assert.assertTrue(recorrectedChange < correctedChange / 10.);
}
 
Example #3
Source File: SliceSamplerUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Test slice sampling of a normal distribution.  Checks that input mean and standard deviation are recovered
 * by 10000 samples to a relative error of 0.5% and 2%, respectively.
 */
@Test
public void testSliceSamplingOfNormalDistribution() {
    rng.setSeed(RANDOM_SEED);

    final double mean = 5.;
    final double standardDeviation = 0.75;
    final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
    final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;

    final double xInitial = 1.;
    final double xMin = Double.NEGATIVE_INFINITY;
    final double xMax = Double.POSITIVE_INFINITY;
    final double width = 0.5;
    final int numSamples = 10000;
    final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02);
}
 
Example #4
Source File: QueryManager.java    From bullet-core with Apache License 2.0 6 votes vote down vote up
/**
 * Gets some statistics about the current state of partitioning and queries in this manager.
 *
 * @return A {@link Map} of {@link PartitionStat} to their values for the current state of the manager.
 */
public Map<PartitionStat, Object> getStats() {
    Map<PartitionStat, Object> stats = new HashMap<>();
    List<Partition> sorted = partitioning.entrySet().stream().map(Partition::new).sorted().collect(Collectors.toList());
    int size = sorted.size();
    stats.put(PartitionStat.QUERY_COUNT, queries.size());
    stats.put(PartitionStat.PARTITION_COUNT, size);
    stats.put(PartitionStat.ACTUAL_QUERIES_SEEN, queriesSeen);
    stats.put(PartitionStat.EXPECTED_QUERIES_SEEN, expectedQueriesSeen);
    if (size > 0) {
        stats.put(PartitionStat.LARGEST_PARTITION, sorted.get(size - 1).toString());
        stats.put(PartitionStat.SMALLEST_PARTITION, sorted.get(0).toString());
        double[] sizes = sorted.stream().mapToDouble(p -> (double) p.count).toArray();
        stats.put(PartitionStat.STDDEV_PARTITION_SIZE, new StandardDeviation().evaluate(sizes));
        stats.put(PartitionStat.DISTRIBUTION_PARTITION_SIZE, getDistributions(sorted));
    }
    return stats;
}
 
Example #5
Source File: MinibatchSliceSamplerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Tests slice sampling of a uniform prior with no data points.
 * Checks that mean and standard deviation are recovered from the samples
 * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively.
 */
@Test
public void testSliceSamplingOfUniformPriorWithNoData() {
    rng.setSeed(RANDOM_SEED);

    final double mean = 0.5;
    final double standardDeviation = 1. / Math.sqrt(12.);
    final List<Double> data = Collections.emptyList();

    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numBurnInSamples = 500;
    final int numSamples = 1500;
    final MinibatchSliceSampler<Double> uniformSampler = new MinibatchSliceSampler<>(
            rng, data, UNIFORM_LOG_PRIOR, (d, x) -> 0.,
            xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD);
    final double[] samples = Doubles.toArray(uniformSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.01);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.05);
}
 
Example #6
Source File: ReCapSegCaller.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static double calculateT(final ReadCountCollection tangentNormalizedCoverage, final List<ModeledSegment> segments) {

        //Get the segments that are likely copy neutral.
        // Math.abs removed to mimic python...
        final List<ModeledSegment> copyNeutralSegments = segments.stream().filter(s -> s.getSegmentMean() < COPY_NEUTRAL_CUTOFF).collect(Collectors.toList());

        // Get the targets that correspond to the copyNeutralSegments... note that individual targets, due to noise,
        //  can be far away from copy neutral
        final TargetCollection<ReadCountRecord.SingleSampleRecord> targetsWithCoverage =
                new HashedListTargetCollection<>(tangentNormalizedCoverage.records().stream().map(ReadCountRecord::asSingleSampleRecord).collect(Collectors.toList()));
        final double[] copyNeutralTargetsCopyRatio = copyNeutralSegments.stream()
                .flatMap(s -> targetsWithCoverage.targets(s).stream())
                .mapToDouble(ReadCountRecord.SingleSampleRecord::getCount).toArray();

        final double meanCopyNeutralTargets = new Mean().evaluate(copyNeutralTargetsCopyRatio);
        final double sigmaCopyNeutralTargets = new StandardDeviation().evaluate(copyNeutralTargetsCopyRatio);

        // Now we filter outliers by only including those w/in 2 standard deviations.
        final double [] filteredCopyNeutralTargetsCopyRatio = Arrays.stream(copyNeutralTargetsCopyRatio)
                .filter(c -> Math.abs(c - meanCopyNeutralTargets) < sigmaCopyNeutralTargets * Z_THRESHOLD).toArray();

        return new StandardDeviation().evaluate(filteredCopyNeutralTargetsCopyRatio);
    }
 
Example #7
Source File: ThirdOctaveBandsFilteringTest.java    From NoiseCapture with GNU General Public License v3.0 6 votes vote down vote up
@Test
public void testPinkNoise() {
    short[] pinkNoise = SOSSignalProcessing.makePinkNoise(441000, (short)2500, 0);
    FFTSignalProcessing fftSignalProcessing = new FFTSignalProcessing(44100,
            ThirdOctaveBandsFiltering.STANDARD_FREQUENCIES_REDUCED, pinkNoise.length);
    fftSignalProcessing.addSample(pinkNoise);
    FFTSignalProcessing.ProcessingResult result = fftSignalProcessing.processSample(FFTSignalProcessing.WINDOW_TYPE.RECTANGULAR,
            false,
            false);

    // Compute
    StandardDeviation standardDeviation = new StandardDeviation();
    double[] dArray = new double[result.dBaLevels.length];
    for(int i = 0; i < result.dBaLevels.length; i++) {
        dArray[i] = result.dBaLevels[i];
    }
    assertEquals(0, standardDeviation.evaluate(dArray), 0.25);
}
 
Example #8
Source File: SliceSamplerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Tests slice sampling of a normal distribution.  Checks that input mean and standard deviation are recovered
 * by 10000 samples to a relative error of 0.5% and 2%, respectively.
 */
@Test
public void testSliceSamplingOfNormalDistribution() {
    rng.setSeed(RANDOM_SEED);

    final double mean = 5.;
    final double standardDeviation = 0.75;
    final NormalDistribution normalDistribution = new NormalDistribution(mean, standardDeviation);
    final Function<Double, Double> normalLogPDF = normalDistribution::logDensity;

    final double xInitial = 1.;
    final double xMin = Double.NEGATIVE_INFINITY;
    final double xMax = Double.POSITIVE_INFINITY;
    final double width = 0.5;
    final int numSamples = 10000;
    final SliceSampler normalSampler = new SliceSampler(rng, normalLogPDF, xMin, xMax, width);
    final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.005);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation), 0., 0.02);
}
 
Example #9
Source File: PosteriorSummaryUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Given a list of posterior samples, returns an estimate of the posterior mode (using
 * mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}).
 * Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation),
 * or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain
 * {@link Double#NaN}, {@link Double#NaN} will be returned.
 * @param samples   posterior samples, cannot be {@code null} and number of samples must be greater than 0
 * @param ctx       {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation
 */
public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) {
    Utils.nonNull(samples);
    Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero.");

    //calculate sample min, max, mean, and standard deviation
    final double sampleMin = Collections.min(samples);
    final double sampleMax = Collections.max(samples);
    final double sampleMean = new Mean().evaluate(Doubles.toArray(samples));
    final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples));

    //if samples are all the same or contain NaN, can simply return mean
    if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) {
        return sampleMean;
    }

    //use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation
    //see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
    final double bandwidth =
            SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT);

    //use kernel density estimation to approximate posterior from samples
    final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth);

    //use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior
    final BrentOptimizer optimizer =
            new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin));
    final UnivariateObjectiveFunction objective =
            new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] {f})[0]);
    //search for mode within sample range, start near sample mean
    final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean);
    return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint();
}
 
Example #10
Source File: MathUtilities.java    From StatsAgg with Apache License 2.0 6 votes vote down vote up
public static BigDecimal computePopulationStandardDeviationOfBigDecimals(List<BigDecimal> numbers) {
    
    if ((numbers == null) || numbers.isEmpty()) {
        return null;
    }
    
    try {
        double[] doublesArray = new double[numbers.size()];

        for (int i = 0; i < doublesArray.length; i++) {
            doublesArray[i] = numbers.get(i).doubleValue();
        }

        StandardDeviation standardDeviation = new StandardDeviation();
        standardDeviation.setBiasCorrected(false);
        BigDecimal standardDeviationResult = new BigDecimal(standardDeviation.evaluate(doublesArray));

        return standardDeviationResult;
    }
    catch (Exception e) {
        logger.error(e.toString() + System.lineSeparator() + StackTrace.getStringFromStackTrace(e));
        return null;
    }
}
 
Example #11
Source File: DescriptiveStatisticsHistogramStatistics.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public double evaluate(double[] values, int begin, int length)
		throws MathIllegalArgumentException {
	this.count = length;
	percentilesImpl.setData(values, begin, length);

	SimpleStats secondMoment = new SimpleStats();
	secondMoment.evaluate(values, begin, length);
	this.mean = secondMoment.getMean();
	this.min = secondMoment.getMin();
	this.max = secondMoment.getMax();

	this.stddev = new StandardDeviation(secondMoment).getResult();

	return Double.NaN;
}
 
Example #12
Source File: bedGraphMath.java    From HMMRATAC with GNU General Public License v3.0 6 votes vote down vote up
private void setMeanAndStd(){
	Mean mu = new Mean();
	StandardDeviation dev = new StandardDeviation();
	for (String chr : bedgraph.keySet()){
		
		ArrayList<TagNode> inTemp = bedgraph.get(chr);
		Collections.sort(inTemp,  TagNode.basepairComparator);
		for (int i = 0; i < inTemp.size();i++){
			int length = inTemp.get(i).getLength();
			double value = inTemp.get(i).getScore2();
			for (int a = 0; a < length;a++){
				mu.increment(value);
				dev.increment(value);
			}
		}
	}
	mean = mu.getResult();
	std = dev.getResult();
}
 
Example #13
Source File: MeanAndStd.java    From HMMRATAC with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Set the data across a specific region
 * @param node a TagNode representing a specific region for calculation
 * @throws IOException
 */
private void SetMeanAndStd2(TagNode node) throws IOException{
	BBFileReader wigReader = new BBFileReader(wigFile);
	String chrom = node.getChrom();
	int begin = node.getStart();
	int end = node.getStop();
	BigWigIterator iter = wigReader.getBigWigIterator(chrom, begin, chrom, end, false);
	Mean mu = new Mean();
	StandardDeviation dev = new StandardDeviation();
	while(iter.hasNext()){
		WigItem item = iter.next();
		int start = item.getStartBase();
		int stop = item.getEndBase();
		double value = item.getWigValue();
		for (int i = start; i < stop;i++){
			mu.increment(value);
			dev.increment(value);
		}
		
	}
	mean = mu.getResult();
	std = dev.getResult();
	wigReader=null;
}
 
Example #14
Source File: MeanAndStd.java    From HMMRATAC with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Set the data across the entire genome
 * @throws IOException
 */
private void SetMeanAndStd() throws IOException{
	BBFileReader wigReader = new BBFileReader(wigFile);
	BigWigIterator iter = wigReader.getBigWigIterator();
	Mean mu = new Mean();
	StandardDeviation dev = new StandardDeviation();
	while(iter.hasNext()){
		WigItem item = iter.next();
		int start = item.getStartBase();
		int stop = item.getEndBase();
		double value = item.getWigValue();
		for (int i = start; i < stop;i++){
			mu.increment(value);
			dev.increment(value);
		}
		
	}
	mean = mu.getResult();
	std = dev.getResult();
}
 
Example #15
Source File: PosteriorSummaryUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Given a list of posterior samples, returns an estimate of the posterior mode (using
 * mllib kernel density estimation in {@link KernelDensity} and {@link BrentOptimizer}).
 * Note that estimate may be poor if number of samples is small (resulting in poor kernel density estimation),
 * or if posterior is not unimodal (or is sufficiently pathological otherwise). If the samples contain
 * {@link Double#NaN}, {@link Double#NaN} will be returned.
 * @param samples   posterior samples, cannot be {@code null} and number of samples must be greater than 0
 * @param ctx       {@link JavaSparkContext} used by {@link KernelDensity} for mllib kernel density estimation
 */
public static double calculatePosteriorMode(final List<Double> samples, final JavaSparkContext ctx) {
    Utils.nonNull(samples);
    Utils.validateArg(samples.size() > 0, "Number of samples must be greater than zero.");

    //calculate sample min, max, mean, and standard deviation
    final double sampleMin = Collections.min(samples);
    final double sampleMax = Collections.max(samples);
    final double sampleMean = new Mean().evaluate(Doubles.toArray(samples));
    final double sampleStandardDeviation = new StandardDeviation().evaluate(Doubles.toArray(samples));

    //if samples are all the same or contain NaN, can simply return mean
    if (sampleStandardDeviation == 0. || Double.isNaN(sampleMean)) {
        return sampleMean;
    }

    //use Silverman's rule to set bandwidth for kernel density estimation from sample standard deviation
    //see https://en.wikipedia.org/wiki/Kernel_density_estimation#Practical_estimation_of_the_bandwidth
    final double bandwidth =
            SILVERMANS_RULE_CONSTANT * sampleStandardDeviation * Math.pow(samples.size(), SILVERMANS_RULE_EXPONENT);

    //use kernel density estimation to approximate posterior from samples
    final KernelDensity pdf = new KernelDensity().setSample(ctx.parallelize(samples, 1)).setBandwidth(bandwidth);

    //use Brent optimization to find mode (i.e., maximum) of kernel-density-estimated posterior
    final BrentOptimizer optimizer =
            new BrentOptimizer(RELATIVE_TOLERANCE, RELATIVE_TOLERANCE * (sampleMax - sampleMin));
    final UnivariateObjectiveFunction objective =
            new UnivariateObjectiveFunction(f -> pdf.estimate(new double[] {f})[0]);
    //search for mode within sample range, start near sample mean
    final SearchInterval searchInterval = new SearchInterval(sampleMin, sampleMax, sampleMean);
    return optimizer.optimize(objective, GoalType.MAXIMIZE, searchInterval, BRENT_MAX_EVAL).getPoint();
}
 
Example #16
Source File: UserProfileKDEModeler.java    From Eagle with Apache License 2.0 5 votes vote down vote up
private void computeStats(RealMatrix m){
    if(m.getColumnDimension() !=  this.cmdTypes.length){
        LOG.error("Please fix the commands list in config file");
    }

    statistics = new UserCommandStatistics[m.getColumnDimension()];

    for(int i=0; i<m.getColumnDimension(); i++){
        UserCommandStatistics stats = new UserCommandStatistics();
        stats.setCommandName(this.cmdTypes[i]);
        RealVector colData = m.getColumnVector(i);
        StandardDeviation deviation = new StandardDeviation();
        double stddev = deviation.evaluate(colData.toArray());

        if(LOG.isDebugEnabled()) LOG.debug("Stddev is NAN ? " + (Double.isNaN(stddev) ? "yes" : "no"));
        if(stddev <= lowVarianceVal)
            stats.setLowVariant(true);
        else
            stats.setLowVariant(false);

        stats.setStddev(stddev);
        Mean mean = new Mean();
        double mu = mean.evaluate(colData.toArray());
        if(LOG.isDebugEnabled()) LOG.debug("mu is NAN ? " + (Double.isNaN(mu)? "yes":"no"));

        stats.setMean(mu);
        statistics[i]=stats;
    }
}
 
Example #17
Source File: MessagePackDataformatTestBase.java    From jackson-dataformat-msgpack with Apache License 2.0 5 votes vote down vote up
protected void printStat(String label, double[] values) {
    StandardDeviation standardDeviation = new StandardDeviation();
    System.out.println(label + ":");
    System.out.println(String.format("  mean : %.2f", StatUtils.mean(values)));
    System.out.println(String.format("  min  : %.2f", StatUtils.min(values)));
    System.out.println(String.format("  max  : %.2f", StatUtils.max(values)));
    System.out.println(String.format("  stdev: %.2f", standardDeviation.evaluate(values)));
    System.out.println("");
}
 
Example #18
Source File: TestDoubleStdDevPopAggregation.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
protected Number getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }

    double[] values = new double[length];
    for (int i = 0; i < length; i++) {
        values[i] = start + i;
    }

    StandardDeviation stdDev = new StandardDeviation(false);
    return stdDev.evaluate(values);
}
 
Example #19
Source File: MatrixSummaryUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Return an array containing the variance for each row in the given matrix.
 * @param m Not {@code null}.  Size MxN.    If any entry is NaN, the corresponding rows will have a
 *          variance of NaN.
 * @return array of size M.  Never {@code null}  IF there is only one column (or only one entry
 */
public static double[] getRowVariances(final RealMatrix m) {
    Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
    final StandardDeviation std = new StandardDeviation();
    return IntStream.range(0, m.getRowDimension()).boxed()
            .mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray();
}
 
Example #20
Source File: RandomDNAUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void checkResults(final int[] results, final int n, final int m) {
    final double mean = Arrays.stream(results).average().getAsDouble();
    final double std = new StandardDeviation().evaluate(Arrays.stream(results).asDoubleStream().toArray());
    final double expectedMean = (n*m)/4.0;
    final double s = std; // not really because it's the population not the sample dtd but it'll do
    Assert.assertTrue(mean < expectedMean + 2 * s / Math.sqrt(n * m), "unexpected mean:" + mean);
    Assert.assertTrue(mean > expectedMean-2*s/Math.sqrt(n*m), "unexpected mean:" +mean);
}
 
Example #21
Source File: MinibatchSliceSamplerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Tests slice sampling of a normal posterior = uniform prior x normal likelihood from 1000 data points.
 * Checks that input mean and standard deviation are recovered from the posterior of the mean parameter
 * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively.
 */
@Test
public void testSliceSamplingOfNormalPosterior() {
    rng.setSeed(RANDOM_SEED);

    final double mean = 5.;
    final double standardDeviation = 0.75;
    final NormalDistribution normalDistribution = new NormalDistribution(rng, mean, standardDeviation);
    final BiFunction<Double, Double, Double> normalLogLikelihood =
            (d, x) -> new NormalDistribution(null, x, standardDeviation).logDensity(d);
    final List<Double> data = Doubles.asList(normalDistribution.sample(NUM_DATA_POINTS));

    final double xInitial = 1.;
    final double xMin = Double.NEGATIVE_INFINITY;
    final double xMax = Double.POSITIVE_INFINITY;
    final double width = 0.5;
    final int numBurnInSamples = 500;
    final int numSamples = 1500;
    final MinibatchSliceSampler<Double> normalSampler = new MinibatchSliceSampler<>(
            rng, data, UNIFORM_LOG_PRIOR, normalLogLikelihood,
            xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD);
    final double[] samples = Doubles.toArray(normalSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, mean), 0., 0.01);
    Assert.assertEquals(relativeError(sampleStandardDeviation, standardDeviation / Math.sqrt(NUM_DATA_POINTS)), 0., 0.05);
}
 
Example #22
Source File: MinibatchSliceSamplerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Tests slice sampling of a beta posterior = uniform prior x binomial likelihood from 1000 data points.
 * Checks that input mean and standard deviation are recovered from the posterior of the mean parameter
 * by 500 burn-in samples + 1000 samples to a relative error of 1% and 5%, respectively.
 */
@Test
public void testSliceSamplingOfBetaPosterior() {
    rng.setSeed(RANDOM_SEED);

    final int numTrials = 100;
    final double p = 0.9;
    final BinomialDistribution binomialDistribution = new BinomialDistribution(rng, numTrials, p);
    final BiFunction<Integer, Double, Double> binomialLogLikelihood =
            (d, x) -> new BinomialDistribution(null, numTrials, x).logProbability(d);
    final List<Integer> data = Ints.asList(binomialDistribution.sample(NUM_DATA_POINTS));
    final double numSuccessesTotal = data.stream().mapToDouble(x -> x).sum();
    final double alpha = 1. + numSuccessesTotal;
    final double beta = 1. + (numTrials * NUM_DATA_POINTS - numSuccessesTotal);
    final double variance = new BetaDistribution(null, alpha, beta).getNumericalVariance();

    final double xInitial = 0.5;
    final double xMin = 0.;
    final double xMax = 1.;
    final double width = 0.1;
    final int numBurnInSamples = 500;
    final int numSamples = 1500;
    final MinibatchSliceSampler<Integer> betaSampler = new MinibatchSliceSampler<>(
            rng, data, UNIFORM_LOG_PRIOR, binomialLogLikelihood,
            xMin, xMax, width, MINIBATCH_SIZE, APPROX_THRESHOLD);
    final double[] samples = Doubles.toArray(betaSampler.sample(xInitial, numSamples).subList(numBurnInSamples, numSamples));

    final double sampleMean = new Mean().evaluate(samples);
    final double sampleStandardDeviation = new StandardDeviation().evaluate(samples);
    Assert.assertEquals(relativeError(sampleMean, p), 0., 0.01);
    Assert.assertEquals(relativeError(sampleStandardDeviation, Math.sqrt(variance)), 0., 0.05);
}
 
Example #23
Source File: GibbsSamplerSingleGaussianUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Tests Bayesian inference of a Gaussian model via MCMC.  Recovery of input values for the variance and mean
 * global parameters is checked.  In particular, the mean and standard deviation of the posteriors for
 * both parameters must be recovered to within a relative error of 1% and 10%, respectively, in 250 samples
 * (after 250 burn-in samples have been discarded).
 */
@Test
public void testRunMCMCOnSingleGaussianModel() {
    //Create new instance of the Modeller helper class, passing all quantities needed to initialize state and data.
    final GaussianModeller modeller = new GaussianModeller(VARIANCE_INITIAL, MEAN_INITIAL, datapointsList);
    //Create a GibbsSampler, passing the total number of samples (including burn-in samples)
    //and the model held by the Modeller.
    final GibbsSampler<GaussianParameter, ParameterizedState<GaussianParameter>, GaussianDataCollection> gibbsSampler =
            new GibbsSampler<>(NUM_SAMPLES, modeller.model);
    //Run the MCMC.
    gibbsSampler.runMCMC();

    //Get the samples of each of the parameter posteriors (discarding burn-in samples) by passing the
    //parameter name, type, and burn-in number to the getSamples method.
    final double[] varianceSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.VARIANCE, Double.class, NUM_BURN_IN));
    final double[] meanSamples = Doubles.toArray(gibbsSampler.getSamples(GaussianParameter.MEAN, Double.class, NUM_BURN_IN));

    //Check that the statistics---i.e., the means and standard deviations---of the posteriors
    //agree with those found by emcee/analytically to a relative error of 1% and 10%, respectively.
    final double variancePosteriorCenter = new Mean().evaluate(varianceSamples);
    final double variancePosteriorStandardDeviation = new StandardDeviation().evaluate(varianceSamples);
    Assert.assertEquals(relativeError(variancePosteriorCenter, VARIANCE_TRUTH),
            0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(
            relativeError(variancePosteriorStandardDeviation, VARIANCE_POSTERIOR_STANDARD_DEVIATION_TRUTH),
            0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
    final double meanPosteriorCenter = new Mean().evaluate(meanSamples);
    final double meanPosteriorStandardDeviation = new StandardDeviation().evaluate(meanSamples);
    Assert.assertEquals(relativeError(meanPosteriorCenter, MEAN_TRUTH),
            0., RELATIVE_ERROR_THRESHOLD_FOR_CENTERS);
    Assert.assertEquals(
            relativeError(meanPosteriorStandardDeviation, MEAN_POSTERIOR_STANDARD_DEVIATION_TRUTH),
            0., RELATIVE_ERROR_THRESHOLD_FOR_STANDARD_DEVIATIONS);
}
 
Example #24
Source File: Endpoint.java    From oryx with Apache License 2.0 5 votes vote down vote up
protected Endpoint(String path, double relativeProb) {
  Preconditions.checkArgument(relativeProb > 0.0);
  this.path = path;
  this.relativeProb = relativeProb;
  meanTimeNanos = new Mean();
  stdevTimeNanos = new StandardDeviation();
}
 
Example #25
Source File: StandardDeviationFunction.java    From jpmml-evaluator with GNU Affero General Public License v3.0 5 votes vote down vote up
public Double evaluate(Collection<?> values, boolean biasCorrected){
	StandardDeviation statistic = new StandardDeviation();
	statistic.setBiasCorrected(biasCorrected);

	for(Object value : values){
		Number number = (Number)TypeUtil.parseOrCast(DataType.DOUBLE, value);

		statistic.increment(number.doubleValue());
	}

	return statistic.getResult();
}
 
Example #26
Source File: TestLongStdDevPopAggregation.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
protected Number getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }

    double[] values = new double[length];
    for (int i = 0; i < length; i++) {
        values[i] = start + i;
    }

    StandardDeviation stdDev = new StandardDeviation(false);
    return stdDev.evaluate(values);
}
 
Example #27
Source File: MatrixSummaryUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Return an array containing the variance for each row in the given matrix.
 * @param m Not {@code null}.  Size MxN.    If any entry is NaN, the corresponding rows will have a
 *          variance of NaN.
 * @return array of size M.  Never {@code null}  IF there is only one column (or only one entry
 */
public static double[] getRowVariances(final RealMatrix m) {
    Utils.nonNull(m, "Cannot calculate medians on a null matrix.");
    final StandardDeviation std = new StandardDeviation();
    return IntStream.range(0, m.getRowDimension()).boxed()
            .mapToDouble(i -> Math.pow(std.evaluate(m.getRow(i)), 2)).toArray();
}
 
Example #28
Source File: HMMR_EM.java    From HMMRATAC with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Perform one iteration of the EM algorithm
 */
public void iterate(){
	
	double[] temp = new double[mu.length];
	double[] counter = new double[mu.length];
	double total = 0.0;
	Mean[] means = new Mean[mu.length];
	StandardDeviation[] std = new StandardDeviation[mu.length];
	for (int i = 0; i < means.length;i++){
		means[i] = new Mean();
		std[i] = new StandardDeviation();
	}
	for (int i = 0;i < data.length;i++){
		
		for (int l = 0;l < mu.length;l++){
			temp[l] = getWeightedDensity(data[i],mu[l],lamda[l],weights[l]);
			
		}
		int index = returnGreater(temp);
		if (index != -1){
			means[index].increment(data[i]);
			std[index].increment(data[i]);
			counter[index]+=1.0;
			total+=1.0;
		}
	}
	for (int i = 0;i < mu.length;i++){
		mu[i] = mu[i] + (jump *(means[i].getResult()-mu[i]));
		lamda[i] = lamda[i] + (jump *(std[i].getResult() - lamda[i]));
		weights[i] = counter[i] / total;
	}
		
	
	
}
 
Example #29
Source File: TestMfsBlockLoader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static double stdDev(List<Long> data) {
  double[] asDouble = new double[data.size()];
  int i = 0;
  for (Long l : data) {
    asDouble[i++] = (double) l;
  }
  return new StandardDeviation().evaluate(asDouble);
}
 
Example #30
Source File: DBIDRouterTest.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Test
public void multipleShardsInTheCluster_shouldBalanceNodes()
{
    int [] shardIdentifiers = range(0,15).toArray();
    int shardCount = shardIdentifiers.length;
    int howManyDocumentsPerShard = 10000;

    Map<Integer, Integer> nodeDistributionMap = new HashMap<>();

    range(0, shardCount * howManyDocumentsPerShard)
            .mapToLong(Long::valueOf)
            .forEach(id -> {
                Node node = new Node();
                node.setId(id);
                stream(shardIdentifiers)
                        .forEach(shardId -> {
                            if (router.routeNode(shardCount, shardId, node))
                            {
                                nodeDistributionMap.merge(shardId, 1, Integer::sum);
                            }
                        });
            });

    StandardDeviation sd = new StandardDeviation();
    double deviation = sd.evaluate(nodeDistributionMap.values().stream().mapToDouble(Number::doubleValue).toArray());
    double norm = deviation/(howManyDocumentsPerShard) * 100;

    assertEquals(shardIdentifiers.length, nodeDistributionMap.size());

    // Asserts the standard deviation of the distribution map is in percentage lesser than 30%
    assertTrue(
            nodeDistributionMap.values().toString() + ", SD = " + deviation + ", SD_NORM = " + norm + "%",
            norm < 30);
}