Java Code Examples for org.apache.commons.math3.stat.descriptive.rank.Percentile#setData()

The following examples show how to use org.apache.commons.math3.stat.descriptive.rank.Percentile#setData() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MomentSolverTest.java    From momentsketch with Apache License 2.0 6 votes vote down vote up
@Test
public void testFromRaw() {
    int n = 1000;
    double[] xVals = new double[n];
    for (int i = 0 ; i < n; i++) {
        xVals[i] = i;
    }
    MomentStruct mData = new MomentStruct(10);
    mData.add(xVals);

    MomentSolver ms = new MomentSolver(mData);
    ms.setGridSize(1024);
    ms.solve();
    double q = ms.getQuantile(.9);

    Percentile p = new Percentile();
    p.setData(xVals);
    double truep90 = p.evaluate(90.0);
    assertEquals(truep90, q, 1.0);

    double[] ps = {0, .1, .5, .9, 1.0};
    double[] qs = ms.getQuantiles(ps);
    assertEquals(0.0, qs[0], 1.0);
    assertEquals(truep90, qs[3], 1.0);
}
 
Example 2
Source File: DecileCollection.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Constructs a DecileCollection from a list of samples using Apache Commons {@link Percentile}.
 * @param samples   list of samples (caution should be used if this contains NaN or infinite values)
 */
public DecileCollection(final List<Double> samples) {
    Utils.nonNull(samples);
    Utils.validateArg(!samples.isEmpty(), "Cannot construct deciles for empty list of samples.");

    final Percentile percentile = new Percentile();
    percentile.setData(Doubles.toArray(samples));
    final Decile[] decileKeys = Decile.values();
    for (int i = 1; i < 10; i++) {
        final double decile = percentile.evaluate(10 * i);
        deciles.put(decileKeys[i - 1], decile);
    }
}
 
Example 3
Source File: Winsorizer.java    From macrobase with Apache License 2.0 5 votes vote down vote up
public List<double[]> process(List<double[]> metrics) {
    int n = metrics.size();
    int k = metrics.get(0).length;
    Percentile p = new Percentile();
    bounds = new double[k][2];
    List<double[]> newMetrics = new ArrayList<>(n);
    for (int i = 0; i < n; i++) {
        newMetrics.add(new double[k]);
    }

    double[] curDimensionValues = new double[n];
    for (int j = 0; j < k; j++) {
        for (int i = 0; i < n; i++) {
            curDimensionValues[i] = metrics.get(i)[j];
        }
        p.setData(curDimensionValues);
        bounds[j][0] = p.evaluate(trimPct);
        bounds[j][1] = p.evaluate(100 - trimPct);
        for (int i = 0; i < n; i++) {
            double curValue = curDimensionValues[i];
            if (curValue > bounds[j][1]) {
                newMetrics.get(i)[j] = bounds[j][1];
            } else if (curValue < bounds[j][0]) {
                newMetrics.get(i)[j] = bounds[j][0];
            } else {
                newMetrics.get(i)[j] = curValue;
            }
        }
    }

    return newMetrics;
}
 
Example 4
Source File: QuantileClassifierTest.java    From macrobase with Apache License 2.0 5 votes vote down vote up
@Test
public void testClassify() throws Exception {
    assertEquals(length, df.getNumRows());
    QuantileClassifier ac = new QuantileClassifier(
            "count",
            quantileColumnsMap
    );
    ac.process(df);
    DataFrame output = ac.getResults();
    assertEquals(df.getNumRows(), output.getNumRows());
    assertEquals(7, df.getSchema().getNumColumns());
    assertEquals(8, output.getSchema().getNumColumns());

    Percentile percentile = new Percentile();
    percentile.setData(rawData);
    double trueLowCutoff = percentile.evaluate(1);
    double trueHighCutoff = percentile.evaluate(99);
    assertEquals(trueLowCutoff, ac.getLowCutoff(), 5.0);
    assertEquals(trueHighCutoff, ac.getHighCutoff(), 5.0);

    double[] outliers = output.getDoubleColumnByName("_OUTLIER");

    for (int i = 0; i < outliers.length; i++) {
        int trueNumOutliers = 0;
        double[] rawGroup = rawGroups.get(i);
        for (int j = 0; j < rawGroup.length; j++) {
            if (rawGroup[j] < trueLowCutoff || rawGroup[j] > trueHighCutoff) {
                trueNumOutliers++;
            }
        }
        assertEquals(trueNumOutliers, outliers[i], 5.0);
    }
}
 
Example 5
Source File: QuantileClassifierTest.java    From macrobase with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigure() throws Exception {
    QuantileClassifier ac = new QuantileClassifier(
            "col1",
            new LinkedHashMap<>()
    );
    ac.setCountColumnName("count");
    ac.setQuantileColumnNames(quantileColumnNames);
    ac.setQuantiles(quantiles);
    ac.setIncludeHigh(false);
    ac.setIncludeLow(true);
    ac.setOutputColumnName("_OUT");
    ac.setPercentile(5.0);

    ac.process(df);
    DataFrame output = ac.getResults();
    assertEquals(df.getNumRows(), output.getNumRows());

    Percentile percentile = new Percentile();
    percentile.setData(rawData);
    double trueLowCutoff = percentile.evaluate(5);
    assertEquals(trueLowCutoff, ac.getLowCutoff(), 5.0);

    double[] outliers = output.getDoubleColumnByName("_OUT");

    for (int i = 0; i < outliers.length; i++) {
        int trueNumOutliers = 0;
        double[] rawGroup = rawGroups.get(i);
        for (int j = 0; j < rawGroup.length; j++) {
            if (rawGroup[j] < trueLowCutoff) {
                trueNumOutliers++;
            }
        }
        assertEquals(trueNumOutliers, outliers[i], 5.0);
    }
}
 
Example 6
Source File: DecileCollection.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Constructs a DecileCollection from a list of samples using Apache Commons {@link Percentile}.
 * @param samples   list of samples (caution should be used if this contains NaN or infinite values)
 */
public DecileCollection(final List<Double> samples) {
    Utils.nonNull(samples);
    Utils.validateArg(!samples.isEmpty(), "Cannot construct deciles for empty list of samples.");

    final Percentile percentile = new Percentile();
    percentile.setData(Doubles.toArray(samples));
    final Decile[] decileKeys = Decile.values();
    for (int i = 1; i < 10; i++) {
        final double decile = percentile.evaluate(10 * i);
        deciles.put(decileKeys[i - 1], decile);
    }
}
 
Example 7
Source File: PercentileAggregator.java    From rapidminer-studio with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
protected double getValue() {
	Percentile percentileCalc = new Percentile();
	percentileCalc.setData(ArrayUtils.toPrimitive(elements.toArray(new Double[0])));
	return percentileCalc.evaluate(percentile);
}
 
Example 8
Source File: MetricBucketTransformer.java    From macrobase with Apache License 2.0 4 votes vote down vote up
@Override
public void process(DataFrame input) throws Exception {
    transformedDF = input.copy();

    int d = metricColumns.size();
    for (int colIdx = 0; colIdx < d; colIdx++) {
        String colName = metricColumns.get(colIdx);
        double[] colValues = input.getDoubleColumnByName(colName);

        int n = colValues.length;
        int k = boundaryPercentiles.length;
        double[] curBoundaries = new double[k];
        Percentile pCalc = new Percentile();
        pCalc.setData(colValues);
        for (int i = 0; i < k; i++) {
            curBoundaries[i] = pCalc.evaluate(boundaryPercentiles[i]);
        }

        String[] bucketNames = new String[k+1];
        if (simpleBucketValues) {
            for (int i = 0; i < k+1; i++) {
                bucketNames[i] = String.format("%s:%d", colName, i);
            }
        } else {
            bucketNames[0] = String.format("%s:[,%g]", colName, curBoundaries[0]);
            for (int i = 1; i < k; i++) {
                bucketNames[i] = String.format("%s:[%g,%g]", colName, curBoundaries[i - 1], curBoundaries[i]);
            }
            bucketNames[k] = String.format("%s:[%g,]", colName, curBoundaries[k - 1]);
        }

        String[] transformedColValues = new String[n];
        for (int i = 0; i < n; i++) {
            int searchIdx = Arrays.binarySearch(curBoundaries, colValues[i]);
            if (searchIdx < 0) {
                searchIdx = -searchIdx - 1;
            }
            transformedColValues[i] = bucketNames[searchIdx];
        }
        transformedDF.addColumn(
                transformedColumnNames.get(colIdx),
                transformedColValues
        );
    }
}
 
Example 9
Source File: LeqStats.java    From NoiseCapture with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Compute Leq stats using specified range.double[][] classRanges = ;
 * @param laOccurrencesRanges Min-Max range ex: new double[][]{{Double.MIN_VALUE, 45}, {45, 55}, {55, 65}, {65, 75},{75, Double.MAX_VALUE}}
 * @return LeqOccurrences instance
 */
public LeqOccurrences computeLeqOccurrences(double[][] laOccurrencesRanges) {
    // Compute invert sum of class occurrences
    List<Double> classList = new ArrayList<>(leqClass.size());
    List<Integer> classValue = new ArrayList<>(leqClass.size());
    long sum = 0;
    double[] values = new double[rmsSumCount];
    int valCounter = 0;
    for(Map.Entry<Integer, AtomicInteger> entry : leqClass.entrySet()) {
        double leq = entry.getKey() * classStep;
        classList.add(leq);
        classValue.add(0, entry.getValue().get());
        sum += entry.getValue().get();
        for(int classValCount = 0; classValCount < entry.getValue().get(); classValCount++) {
            values[valCounter++] = leq;
        }
    }
    List<Double> sumClassValuePerc = new ArrayList<>(classValue.size());
    double invSum = 0.;
    for(int classVal : classValue) {
        invSum += classVal / (double)sum;
        sumClassValuePerc.add(0, invSum);
    }
    Percentile percentile = new Percentile();
    percentile.setData(values);
    // Fetch level at each lae
    double la10 = percentile.evaluate(100 - 10);
    double la50 = percentile.evaluate(50);
    double la90 = percentile.evaluate(100 - 90);

    // Sum percentage between provided laOccurrancesRanges
    List<Double> laOccurrencesRangesValue = new ArrayList<>();
    if(laOccurrencesRanges != null) {
        for(double[] range : laOccurrencesRanges) {
            double min = range[0];
            double max = range[1];
            double sumClass = 0;
            for(int idClass = 0; idClass < sumClassValuePerc.size(); idClass++) {
                if(classList.get(idClass) >= min) {
                    if(classList.get(idClass) < max) {
                        sumClass += classValue.get(sumClassValuePerc.size() - 1 - idClass) / (double)sum;
                    } else {
                        break;
                    }
                }
            }
            laOccurrencesRangesValue.add(sumClass);
        }
    }

    return new LeqOccurrences(la10, la50, la90, laOccurrencesRangesValue);
}
 
Example 10
Source File: ComplexDoubleVector.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public double doublePercentile(int percentile){

	if(this.size == 0){
		throw new IllegalStateException();
	}

	double[] data = new double[this.size];

	System.arraycopy(this.values, 0, data, 0, data.length);

	Arrays.sort(data);

	Percentile statistic = new Percentile();
	statistic.setData(data);

	return statistic.evaluate(percentile);
}