Java Code Examples for org.apache.commons.math3.stat.descriptive.rank.Percentile#evaluate()

The following examples show how to use org.apache.commons.math3.stat.descriptive.rank.Percentile#evaluate() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MomentSolverTest.java    From momentsketch with Apache License 2.0 6 votes vote down vote up
@Test
public void testFromRaw() {
    int n = 1000;
    double[] xVals = new double[n];
    for (int i = 0 ; i < n; i++) {
        xVals[i] = i;
    }
    MomentStruct mData = new MomentStruct(10);
    mData.add(xVals);

    MomentSolver ms = new MomentSolver(mData);
    ms.setGridSize(1024);
    ms.solve();
    double q = ms.getQuantile(.9);

    Percentile p = new Percentile();
    p.setData(xVals);
    double truep90 = p.evaluate(90.0);
    assertEquals(truep90, q, 1.0);

    double[] ps = {0, .1, .5, .9, 1.0};
    double[] qs = ms.getQuantiles(ps);
    assertEquals(0.0, qs[0], 1.0);
    assertEquals(truep90, qs[3], 1.0);
}
 
Example 2
Source File: MovingMedianEvaluator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Object doWork(Object first, Object second) throws IOException{
  if(null == first){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
  }
  if(null == second){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));
  }
  if(!(first instanceof List<?>)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a List",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }
  if(!(second instanceof Number)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a Number",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }

  List<?> values = (List<?>)first;
  int window = ((Number)second).intValue();

  List<Number> moving = new ArrayList<>();
  DescriptiveStatistics slider = new DescriptiveStatistics(window);
  Percentile percentile = new Percentile();
  for(Object value : values){
    slider.addValue(((Number)value).doubleValue());
    if(slider.getN() >= window){
      double median = percentile.evaluate(slider.getValues(), 50);
      moving.add(median);
    }
  }

  return moving;
}
 
Example 3
Source File: DecileCollection.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Constructs a DecileCollection from a list of samples using Apache Commons {@link Percentile}.
 * @param samples   list of samples (caution should be used if this contains NaN or infinite values)
 */
public DecileCollection(final List<Double> samples) {
    Utils.nonNull(samples);
    Utils.validateArg(!samples.isEmpty(), "Cannot construct deciles for empty list of samples.");

    final Percentile percentile = new Percentile();
    percentile.setData(Doubles.toArray(samples));
    final Decile[] decileKeys = Decile.values();
    for (int i = 1; i < 10; i++) {
        final double decile = percentile.evaluate(10 * i);
        deciles.put(decileKeys[i - 1], decile);
    }
}
 
Example 4
Source File: Winsorizer.java    From macrobase with Apache License 2.0 5 votes vote down vote up
public List<double[]> process(List<double[]> metrics) {
    int n = metrics.size();
    int k = metrics.get(0).length;
    Percentile p = new Percentile();
    bounds = new double[k][2];
    List<double[]> newMetrics = new ArrayList<>(n);
    for (int i = 0; i < n; i++) {
        newMetrics.add(new double[k]);
    }

    double[] curDimensionValues = new double[n];
    for (int j = 0; j < k; j++) {
        for (int i = 0; i < n; i++) {
            curDimensionValues[i] = metrics.get(i)[j];
        }
        p.setData(curDimensionValues);
        bounds[j][0] = p.evaluate(trimPct);
        bounds[j][1] = p.evaluate(100 - trimPct);
        for (int i = 0; i < n; i++) {
            double curValue = curDimensionValues[i];
            if (curValue > bounds[j][1]) {
                newMetrics.get(i)[j] = bounds[j][1];
            } else if (curValue < bounds[j][0]) {
                newMetrics.get(i)[j] = bounds[j][0];
            } else {
                newMetrics.get(i)[j] = curValue;
            }
        }
    }

    return newMetrics;
}
 
Example 5
Source File: QuantileClassifierTest.java    From macrobase with Apache License 2.0 5 votes vote down vote up
@Test
public void testClassify() throws Exception {
    assertEquals(length, df.getNumRows());
    QuantileClassifier ac = new QuantileClassifier(
            "count",
            quantileColumnsMap
    );
    ac.process(df);
    DataFrame output = ac.getResults();
    assertEquals(df.getNumRows(), output.getNumRows());
    assertEquals(7, df.getSchema().getNumColumns());
    assertEquals(8, output.getSchema().getNumColumns());

    Percentile percentile = new Percentile();
    percentile.setData(rawData);
    double trueLowCutoff = percentile.evaluate(1);
    double trueHighCutoff = percentile.evaluate(99);
    assertEquals(trueLowCutoff, ac.getLowCutoff(), 5.0);
    assertEquals(trueHighCutoff, ac.getHighCutoff(), 5.0);

    double[] outliers = output.getDoubleColumnByName("_OUTLIER");

    for (int i = 0; i < outliers.length; i++) {
        int trueNumOutliers = 0;
        double[] rawGroup = rawGroups.get(i);
        for (int j = 0; j < rawGroup.length; j++) {
            if (rawGroup[j] < trueLowCutoff || rawGroup[j] > trueHighCutoff) {
                trueNumOutliers++;
            }
        }
        assertEquals(trueNumOutliers, outliers[i], 5.0);
    }
}
 
Example 6
Source File: QuantileClassifierTest.java    From macrobase with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigure() throws Exception {
    QuantileClassifier ac = new QuantileClassifier(
            "col1",
            new LinkedHashMap<>()
    );
    ac.setCountColumnName("count");
    ac.setQuantileColumnNames(quantileColumnNames);
    ac.setQuantiles(quantiles);
    ac.setIncludeHigh(false);
    ac.setIncludeLow(true);
    ac.setOutputColumnName("_OUT");
    ac.setPercentile(5.0);

    ac.process(df);
    DataFrame output = ac.getResults();
    assertEquals(df.getNumRows(), output.getNumRows());

    Percentile percentile = new Percentile();
    percentile.setData(rawData);
    double trueLowCutoff = percentile.evaluate(5);
    assertEquals(trueLowCutoff, ac.getLowCutoff(), 5.0);

    double[] outliers = output.getDoubleColumnByName("_OUT");

    for (int i = 0; i < outliers.length; i++) {
        int trueNumOutliers = 0;
        double[] rawGroup = rawGroups.get(i);
        for (int j = 0; j < rawGroup.length; j++) {
            if (rawGroup[j] < trueLowCutoff) {
                trueNumOutliers++;
            }
        }
        assertEquals(trueNumOutliers, outliers[i], 5.0);
    }
}
 
Example 7
Source File: DecileCollection.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Constructs a DecileCollection from a list of samples using Apache Commons {@link Percentile}.
 * @param samples   list of samples (caution should be used if this contains NaN or infinite values)
 */
public DecileCollection(final List<Double> samples) {
    Utils.nonNull(samples);
    Utils.validateArg(!samples.isEmpty(), "Cannot construct deciles for empty list of samples.");

    final Percentile percentile = new Percentile();
    percentile.setData(Doubles.toArray(samples));
    final Decile[] decileKeys = Decile.values();
    for (int i = 1; i < 10; i++) {
        final double decile = percentile.evaluate(10 * i);
        deciles.put(decileKeys[i - 1], decile);
    }
}
 
Example 8
Source File: PercentileAggregator.java    From rapidminer-studio with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
protected double getValue() {
	Percentile percentileCalc = new Percentile();
	percentileCalc.setData(ArrayUtils.toPrimitive(elements.toArray(new Double[0])));
	return percentileCalc.evaluate(percentile);
}
 
Example 9
Source File: ReadCountCollectionUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Truncates the extreme count values in the input read-count collection.
 * Values are forced to be bound by the percentile indicated with the input {@code percentile} which must be
 * in the range [0 .. 50.0]. Values under that percentile and the complementary (1 - percentile) are set to the
 * corresponding threshold value.
 *
 * <p>The imputation is done in-place, thus the input matrix is modified as a result of this call.</p>
 *
 * @param readCounts the input and output read-count matrix.
 */
public static void truncateExtremeCounts(final ReadCountCollection readCounts, final double percentile, final Logger logger) {

    final RealMatrix counts = readCounts.counts();
    final int targetCount = counts.getRowDimension();
    final int columnCount = counts.getColumnDimension();

    // Create a row major array of the counts.
    final double[] values = Doubles.concat(counts.getData());

    final Percentile bottomPercentileEvaluator = new Percentile(percentile);
    final Percentile topPercentileEvaluator = new Percentile(100.0 - percentile);
    final double bottomPercentileThreshold = bottomPercentileEvaluator.evaluate(values);
    final double topPercentileThreshold = topPercentileEvaluator.evaluate(values);
    long totalCounts = 0;
    long bottomTruncatedCounts = 0;
    long topTruncatedCounts = 0;

    for (int i = 0; i < targetCount; i++) {
        final double[] rowCounts = counts.getRow(i);
        for (int j = 0; j < columnCount; j++) {
            final double count = rowCounts[j];
            totalCounts++;
            if (count < bottomPercentileThreshold) {
                counts.setEntry(i, j, bottomPercentileThreshold);
                bottomTruncatedCounts++;
            } else if (count > topPercentileThreshold) {
                counts.setEntry(i, j, topPercentileThreshold);
                topTruncatedCounts++;
            }
        }
    }
    if (topTruncatedCounts == 0 && bottomTruncatedCounts == 0) {
        logger.info(String.format("None of the %d counts were truncated as they all fall in the non-extreme range " +
                "[%.2f, %.2f]", totalCounts, bottomPercentileThreshold, topPercentileThreshold));
    } else {
        final double truncatedPercentage = ((double)(topTruncatedCounts + bottomTruncatedCounts) / totalCounts) * 100;
        logger.info(String.format("Some counts (%d out of %d, %.2f%%) were truncated as they fall out of the " +
                "non-extreme range [%.2f, %.2f]", topTruncatedCounts + bottomTruncatedCounts, totalCounts,
                truncatedPercentage, bottomPercentileThreshold, topPercentileThreshold));
    }
}
 
Example 10
Source File: RobustEmpiricalCovarianceTest.java    From macrobase with Apache License 2.0 4 votes vote down vote up
@Test
public void testFindOutliers() {
    int k = 5;
    int n = 50000;
    int noiseFreq = 50;
    double outlierRate = 0.01;

    Random r = new Random(0);
    List<Datum> testData = new ArrayList<>();
    for (int i = 0; i < n; ++i) {
        double[] sample = new double[k];
        for (int j = 0; j < k; j++) {
            if (i % noiseFreq == 0) {
                sample[j] = 100*r.nextDouble();
            } else {
                sample[j] = (j+1)*r.nextGaussian();
            }
        }
        testData.add(new Datum(new ArrayList<>(), new ArrayRealVector(sample)));
    }

    MacroBaseConf conf = new MacroBaseConf()
            .set(MacroBaseConf.MCD_STOPPING_DELTA, 0.0001)
            .set(MacroBaseConf.RANDOM_SEED, 0)
            .set(MacroBaseConf.METRICS, Arrays.asList(new String[k]));
    MinCovDet mcd = new MinCovDet(conf);

    long startTime = System.currentTimeMillis();
    mcd.train(testData);
    long endTime = System.currentTimeMillis();
    log.debug("MCD Trained on {} in {}", n, endTime-startTime);

    RobustEmpiricalCovariance rcov = new RobustEmpiricalCovariance(conf);
    startTime = System.currentTimeMillis();
    rcov.train(testData);
    endTime = System.currentTimeMillis();
    log.debug("RCOV Trained on {} in {}", n, endTime-startTime);

    double[] mcdScores = new double[n];
    startTime = System.currentTimeMillis();
    for (int i = 0; i < n; i++) {
        mcdScores[i] = mcd.score(testData.get(i));
    }
    endTime = System.currentTimeMillis();
    log.debug("MCD Scored on {} in {}", n, endTime-startTime);

    double[] rcovScores = new double[n];
    startTime = System.currentTimeMillis();
    for (int i = 0; i < n; i++) {
        rcovScores[i] = rcov.score(testData.get(i));
    }
    endTime = System.currentTimeMillis();
    log.debug("RCOV Scored on {} in {}", n, endTime-startTime);

    Percentile pCalc = new Percentile();
    double mcdThreshold = pCalc.evaluate(mcdScores, (1-outlierRate) * 100);
    double rcovThreshold = pCalc.evaluate(rcovScores, (1-outlierRate) * 100);
    int numAgree = 0;
    for (int i = 0; i < n; i++) {
        if ((mcdScores[i] > mcdThreshold) == (rcovScores[i] > rcovThreshold)) {
            numAgree++;
        }
    }
    assertTrue(n - numAgree < 50);
}
 
Example 11
Source File: MetricBucketTransformer.java    From macrobase with Apache License 2.0 4 votes vote down vote up
@Override
public void process(DataFrame input) throws Exception {
    transformedDF = input.copy();

    int d = metricColumns.size();
    for (int colIdx = 0; colIdx < d; colIdx++) {
        String colName = metricColumns.get(colIdx);
        double[] colValues = input.getDoubleColumnByName(colName);

        int n = colValues.length;
        int k = boundaryPercentiles.length;
        double[] curBoundaries = new double[k];
        Percentile pCalc = new Percentile();
        pCalc.setData(colValues);
        for (int i = 0; i < k; i++) {
            curBoundaries[i] = pCalc.evaluate(boundaryPercentiles[i]);
        }

        String[] bucketNames = new String[k+1];
        if (simpleBucketValues) {
            for (int i = 0; i < k+1; i++) {
                bucketNames[i] = String.format("%s:%d", colName, i);
            }
        } else {
            bucketNames[0] = String.format("%s:[,%g]", colName, curBoundaries[0]);
            for (int i = 1; i < k; i++) {
                bucketNames[i] = String.format("%s:[%g,%g]", colName, curBoundaries[i - 1], curBoundaries[i]);
            }
            bucketNames[k] = String.format("%s:[%g,]", colName, curBoundaries[k - 1]);
        }

        String[] transformedColValues = new String[n];
        for (int i = 0; i < n; i++) {
            int searchIdx = Arrays.binarySearch(curBoundaries, colValues[i]);
            if (searchIdx < 0) {
                searchIdx = -searchIdx - 1;
            }
            transformedColValues[i] = bucketNames[searchIdx];
        }
        transformedDF.addColumn(
                transformedColumnNames.get(colIdx),
                transformedColValues
        );
    }
}
 
Example 12
Source File: LeqStats.java    From NoiseCapture with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Compute Leq stats using specified range.double[][] classRanges = ;
 * @param laOccurrencesRanges Min-Max range ex: new double[][]{{Double.MIN_VALUE, 45}, {45, 55}, {55, 65}, {65, 75},{75, Double.MAX_VALUE}}
 * @return LeqOccurrences instance
 */
public LeqOccurrences computeLeqOccurrences(double[][] laOccurrencesRanges) {
    // Compute invert sum of class occurrences
    List<Double> classList = new ArrayList<>(leqClass.size());
    List<Integer> classValue = new ArrayList<>(leqClass.size());
    long sum = 0;
    double[] values = new double[rmsSumCount];
    int valCounter = 0;
    for(Map.Entry<Integer, AtomicInteger> entry : leqClass.entrySet()) {
        double leq = entry.getKey() * classStep;
        classList.add(leq);
        classValue.add(0, entry.getValue().get());
        sum += entry.getValue().get();
        for(int classValCount = 0; classValCount < entry.getValue().get(); classValCount++) {
            values[valCounter++] = leq;
        }
    }
    List<Double> sumClassValuePerc = new ArrayList<>(classValue.size());
    double invSum = 0.;
    for(int classVal : classValue) {
        invSum += classVal / (double)sum;
        sumClassValuePerc.add(0, invSum);
    }
    Percentile percentile = new Percentile();
    percentile.setData(values);
    // Fetch level at each lae
    double la10 = percentile.evaluate(100 - 10);
    double la50 = percentile.evaluate(50);
    double la90 = percentile.evaluate(100 - 90);

    // Sum percentage between provided laOccurrancesRanges
    List<Double> laOccurrencesRangesValue = new ArrayList<>();
    if(laOccurrencesRanges != null) {
        for(double[] range : laOccurrencesRanges) {
            double min = range[0];
            double max = range[1];
            double sumClass = 0;
            for(int idClass = 0; idClass < sumClassValuePerc.size(); idClass++) {
                if(classList.get(idClass) >= min) {
                    if(classList.get(idClass) < max) {
                        sumClass += classValue.get(sumClassValuePerc.size() - 1 - idClass) / (double)sum;
                    } else {
                        break;
                    }
                }
            }
            laOccurrencesRangesValue.add(sumClass);
        }
    }

    return new LeqOccurrences(la10, la50, la90, laOccurrencesRangesValue);
}
 
Example 13
Source File: ComplexDoubleVector.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public double doublePercentile(int percentile){

	if(this.size == 0){
		throw new IllegalStateException();
	}

	double[] data = new double[this.size];

	System.arraycopy(this.values, 0, data, 0, data.length);

	Arrays.sort(data);

	Percentile statistic = new Percentile();
	statistic.setData(data);

	return statistic.evaluate(percentile);
}