org.apache.commons.math3.stat.descriptive.rank.Percentile Java Examples

The following examples show how to use org.apache.commons.math3.stat.descriptive.rank.Percentile. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DescriptiveStatistics.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns an estimate for the pth percentile of the stored values.
 * <p>
 * The implementation provided here follows the first estimation procedure presented
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
 * </p><p>
 * <strong>Preconditions</strong>:<ul>
 * <li><code>0 &lt; p &le; 100</code> (otherwise an
 * <code>IllegalArgumentException</code> is thrown)</li>
 * <li>at least one value must be stored (returns <code>Double.NaN
 *     </code> otherwise)</li>
 * </ul></p>
 *
 * @param p the requested percentile (scaled from 0 - 100)
 * @return An estimate for the pth percentile of the stored data
 * @throws IllegalStateException if percentile implementation has been
 *  overridden and the supplied implementation does not support setQuantile
 */
public double getPercentile(double p) {
    if (percentileImpl instanceof Percentile) {
        ((Percentile) percentileImpl).setQuantile(p);
    } else {
        try {
            percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
                    new Class[] {Double.TYPE}).invoke(percentileImpl,
                            new Object[] {Double.valueOf(p)});
        } catch (NoSuchMethodException e1) { // Setter guard should prevent
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
                  percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
        } catch (IllegalAccessException e2) {
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
                  SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
        } catch (InvocationTargetException e3) {
            throw new IllegalStateException(e3.getCause());
        }
    }
    return apply(percentileImpl);
}
 
Example #2
Source File: PercentileClassifier.java    From macrobase with Apache License 2.0 6 votes vote down vote up
@Override
public void process(DataFrame input) {
    double[] metrics = input.getDoubleColumnByName(columnName);
    int len = metrics.length;
    lowCutoff = new Percentile().evaluate(metrics, percentile);
    highCutoff = new Percentile().evaluate(metrics, 100.0 - percentile);

    output = input.copy();
    double[] resultColumn = new double[len];
    for (int i = 0; i < len; i++) {
        double curVal = metrics[i];
        if ((curVal > highCutoff && includeHigh)
            || (curVal < lowCutoff && includeLow)
            ) {
            resultColumn[i] = 1.0;
        }
    }
    output.addColumn(outputColumnName, resultColumn);
}
 
Example #3
Source File: HDF5PCACoveragePoNCreationUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider="readCountAndPercentileData")
public void testSubsetTargetToUsableOnes(final ReadCountCollection readCount, final double percentile) {
    final Median median = new Median();
    final RealMatrix counts = readCount.counts();
    final double[] targetMedians = IntStream.range(0, counts.getRowDimension())
            .mapToDouble(i -> median.evaluate(counts.getRow(i))).toArray();
    final double threshold = new Percentile(percentile).evaluate(targetMedians);
    final Boolean[] toBeKept = DoubleStream.of(targetMedians)
            .mapToObj(d -> d >= threshold).toArray(Boolean[]::new);
    final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count();
    final Pair<ReadCountCollection, double[]> result = HDF5PCACoveragePoNCreationUtils.subsetReadCountsToUsableTargets(readCount, percentile, NULL_LOGGER);
    Assert.assertEquals(result.getLeft().targets().size(), toBeKeptCount);
    Assert.assertEquals(result.getRight().length, toBeKeptCount);
    int nextIndex = 0;
    for (int i = 0; i < toBeKept.length; i++) {
        if (toBeKept[i]) {
            int index = result.getLeft().targets().indexOf(readCount.targets().get(i));
            Assert.assertEquals(index, nextIndex++);
            Assert.assertEquals(counts.getRow(i), result.getLeft().counts().getRow(index));
            Assert.assertEquals(result.getRight()[index], targetMedians[i]);
        } else {
            Assert.assertEquals(result.getLeft().targets().indexOf(readCount.targets().get(i)), -1);
        }
    }
}
 
Example #4
Source File: ReadCountCollectionUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider="readCountAndPercentileData")
public void testExtremeMedianColumnsData(final ReadCountCollection readCount, final double percentile) {
    final Median median = new Median();
    final RealMatrix counts = readCount.counts();
    final double[] columnMedians = IntStream.range(0, counts.getColumnDimension())
            .mapToDouble(i -> median.evaluate(counts.getColumn(i))).toArray();
    final double top = new Percentile(100 - percentile).evaluate(columnMedians);
    final double bottom = new Percentile(percentile).evaluate(columnMedians);
    final Boolean[] toBeKept = DoubleStream.of(columnMedians)
            .mapToObj(d -> d <= top && d >= bottom).toArray(Boolean[]::new);
    final int toBeKeptCount = (int) Stream.of(toBeKept).filter(b -> b).count();
    final ReadCountCollection result = ReadCountCollectionUtils.removeColumnsWithExtremeMedianCounts(readCount, percentile, NULL_LOGGER);
    Assert.assertEquals(result.columnNames().size(), toBeKeptCount);
    int nextIndex = 0;
    for (int i = 0; i < toBeKept.length; i++) {
        if (toBeKept[i]) {
            int index = result.columnNames().indexOf(readCount.columnNames().get(i));
            Assert.assertEquals(index, nextIndex++);
            Assert.assertEquals(counts.getColumn(i), result.counts().getColumn(index));
        } else {
            Assert.assertEquals(result.columnNames().indexOf(readCount.columnNames().get(i)), -1);
        }
    }
}
 
Example #5
Source File: ReadCountCollectionUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider="readCountAndPercentileData")
public void testTruncateExtremeCounts(final ReadCountCollection readCount, final double percentile) {
    final RealMatrix counts = readCount.counts();
    final double[] allCounts = Stream.of(counts.getData())
            .flatMap(row -> DoubleStream.of(row).boxed())
            .mapToDouble(Double::doubleValue).toArray();
    final double bottom = new Percentile(percentile).evaluate(allCounts);
    final double top = new Percentile(100 - percentile).evaluate(allCounts);
    final double[][] expected = new double[counts.getRowDimension()][];
    for (int i = 0; i < expected.length; i++) {
        expected[i] = DoubleStream.of(counts.getRow(i)).map(d -> d < bottom ? bottom : (d > top) ? top : d).toArray();
    }
    ReadCountCollectionUtils.truncateExtremeCounts(readCount, percentile, NULL_LOGGER);
    final RealMatrix newCounts = readCount.counts();
    Assert.assertEquals(newCounts.getRowDimension(), newCounts.getRowDimension());
    Assert.assertEquals(newCounts.getColumnDimension(), newCounts.getColumnDimension());
    for (int i = 0; i < expected.length; i++) {
        for (int j = 0; j < expected[i].length; j++) {
            Assert.assertEquals(newCounts.getEntry(i, j), expected[i][j]);
        }
    }
}
 
Example #6
Source File: HDF5PCACoveragePoNCreationUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Subsets targets in the input count to the usable ones based on the percentile threshold indicated
 * by the user.
 *
 * <p>
 *     It returns a pair of object, where the left one is the updated read-counts with only the usable
 *     targets, and the right one is the corresponding target factors.
 * </p>
 *
 * @param readCounts the input read-counts.
 * @param targetFactorPercentileThreshold the minimum median count percentile under which targets are not considered useful.
 * @return never {@code null}.
 */
@VisibleForTesting
static Pair<ReadCountCollection, double[]> subsetReadCountsToUsableTargets(final ReadCountCollection readCounts,
                                                                           final double targetFactorPercentileThreshold, final Logger logger) {
    final double[] targetFactors = calculateTargetFactors(readCounts);
    final double threshold = new Percentile(targetFactorPercentileThreshold).evaluate(targetFactors);
    final List<Target> targetByIndex = readCounts.targets();
    final Set<Target> result = IntStream.range(0, targetFactors.length).filter(i -> targetFactors[i] >= threshold)
            .mapToObj(targetByIndex::get)
            .collect(Collectors.toCollection(LinkedHashSet::new));
    if (result.size() == targetByIndex.size()) {
        logger.info(String.format("All %d targets are kept", targetByIndex.size()));
        return new ImmutablePair<>(readCounts, targetFactors);
    } else {
        final int discardedCount = targetFactors.length - result.size();
        logger.info(String.format("Discarded %d target(s) out of %d with factors below %.2g (%.2f percentile)", discardedCount, targetFactors.length, threshold, targetFactorPercentileThreshold  ));
        final double[] targetFactorSubset = DoubleStream.of(targetFactors).filter(i -> i >= threshold).toArray();
        return new ImmutablePair<>(readCounts.subsetTargets(result), targetFactorSubset);
    }
}
 
Example #7
Source File: DescriptiveStatistics.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns an estimate for the pth percentile of the stored values.
 * <p>
 * The implementation provided here follows the first estimation procedure presented
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
 * </p><p>
 * <strong>Preconditions</strong>:<ul>
 * <li><code>0 &lt; p &le; 100</code> (otherwise an
 * <code>MathIllegalArgumentException</code> is thrown)</li>
 * <li>at least one value must be stored (returns <code>Double.NaN
 *     </code> otherwise)</li>
 * </ul></p>
 *
 * @param p the requested percentile (scaled from 0 - 100)
 * @return An estimate for the pth percentile of the stored data
 * @throws MathIllegalStateException if percentile implementation has been
 *  overridden and the supplied implementation does not support setQuantile
 * @throws MathIllegalArgumentException if p is not a valid quantile
 */
public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException {
    if (percentileImpl instanceof Percentile) {
        ((Percentile) percentileImpl).setQuantile(p);
    } else {
        try {
            percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
                    new Class[] {Double.TYPE}).invoke(percentileImpl,
                            new Object[] {Double.valueOf(p)});
        } catch (NoSuchMethodException e1) { // Setter guard should prevent
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
                  percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
        } catch (IllegalAccessException e2) {
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
                  SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
        } catch (InvocationTargetException e3) {
            throw new IllegalStateException(e3.getCause());
        }
    }
    return apply(percentileImpl);
}
 
Example #8
Source File: DescriptiveStatistics.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns an estimate for the pth percentile of the stored values.
 * <p>
 * The implementation provided here follows the first estimation procedure presented
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
 * </p><p>
 * <strong>Preconditions</strong>:<ul>
 * <li><code>0 &lt; p &le; 100</code> (otherwise an
 * <code>MathIllegalArgumentException</code> is thrown)</li>
 * <li>at least one value must be stored (returns <code>Double.NaN
 *     </code> otherwise)</li>
 * </ul></p>
 *
 * @param p the requested percentile (scaled from 0 - 100)
 * @return An estimate for the pth percentile of the stored data
 * @throws MathIllegalStateException if percentile implementation has been
 *  overridden and the supplied implementation does not support setQuantile
 * @throws MathIllegalArgumentException if p is not a valid quantile
 */
public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException {
    if (percentileImpl instanceof Percentile) {
        ((Percentile) percentileImpl).setQuantile(p);
    } else {
        try {
            percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
                    new Class[] {Double.TYPE}).invoke(percentileImpl,
                            new Object[] {Double.valueOf(p)});
        } catch (NoSuchMethodException e1) { // Setter guard should prevent
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
                  percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
        } catch (IllegalAccessException e2) {
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
                  SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
        } catch (InvocationTargetException e3) {
            throw new IllegalStateException(e3.getCause());
        }
    }
    return apply(percentileImpl);
}
 
Example #9
Source File: DescriptiveStatistics.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns an estimate for the pth percentile of the stored values.
 * <p>
 * The implementation provided here follows the first estimation procedure presented
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
 * </p><p>
 * <strong>Preconditions</strong>:<ul>
 * <li><code>0 &lt; p &le; 100</code> (otherwise an
 * <code>MathIllegalArgumentException</code> is thrown)</li>
 * <li>at least one value must be stored (returns <code>Double.NaN
 *     </code> otherwise)</li>
 * </ul></p>
 *
 * @param p the requested percentile (scaled from 0 - 100)
 * @return An estimate for the pth percentile of the stored data
 * @throws MathIllegalStateException if percentile implementation has been
 *  overridden and the supplied implementation does not support setQuantile
 * @throws MathIllegalArgumentException if p is not a valid quantile
 */
public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException {
    if (percentileImpl instanceof Percentile) {
        ((Percentile) percentileImpl).setQuantile(p);
    } else {
        try {
            percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
                    new Class[] {Double.TYPE}).invoke(percentileImpl,
                            new Object[] {Double.valueOf(p)});
        } catch (NoSuchMethodException e1) { // Setter guard should prevent
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
                  percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
        } catch (IllegalAccessException e2) {
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
                  SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
        } catch (InvocationTargetException e3) {
            throw new IllegalStateException(e3.getCause());
        }
    }
    return apply(percentileImpl);
}
 
Example #10
Source File: DescriptiveStatistics.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns an estimate for the pth percentile of the stored values.
 * <p>
 * The implementation provided here follows the first estimation procedure presented
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
 * </p><p>
 * <strong>Preconditions</strong>:<ul>
 * <li><code>0 &lt; p &le; 100</code> (otherwise an
 * <code>IllegalArgumentException</code> is thrown)</li>
 * <li>at least one value must be stored (returns <code>Double.NaN
 *     </code> otherwise)</li>
 * </ul></p>
 *
 * @param p the requested percentile (scaled from 0 - 100)
 * @return An estimate for the pth percentile of the stored data
 * @throws IllegalStateException if percentile implementation has been
 *  overridden and the supplied implementation does not support setQuantile
 */
public double getPercentile(double p) {
    if (percentileImpl instanceof Percentile) {
        ((Percentile) percentileImpl).setQuantile(p);
    } else {
        try {
            percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
                    new Class[] {Double.TYPE}).invoke(percentileImpl,
                            new Object[] {Double.valueOf(p)});
        } catch (NoSuchMethodException e1) { // Setter guard should prevent
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
                  percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
        } catch (IllegalAccessException e2) {
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
                  SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
        } catch (InvocationTargetException e3) {
            throw new IllegalStateException(e3.getCause());
        }
    }
    return apply(percentileImpl);
}
 
Example #11
Source File: DescriptiveStatistics.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns an estimate for the pth percentile of the stored values.
 * <p>
 * The implementation provided here follows the first estimation procedure presented
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
 * </p><p>
 * <strong>Preconditions</strong>:<ul>
 * <li><code>0 &lt; p &le; 100</code> (otherwise an
 * <code>MathIllegalArgumentException</code> is thrown)</li>
 * <li>at least one value must be stored (returns <code>Double.NaN
 *     </code> otherwise)</li>
 * </ul></p>
 *
 * @param p the requested percentile (scaled from 0 - 100)
 * @return An estimate for the pth percentile of the stored data
 * @throws MathIllegalStateException if percentile implementation has been
 *  overridden and the supplied implementation does not support setQuantile
 * @throws MathIllegalArgumentException if p is not a valid quantile
 */
public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException {
    if (percentileImpl instanceof Percentile) {
        ((Percentile) percentileImpl).setQuantile(p);
    } else {
        try {
            percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
                    new Class[] {Double.TYPE}).invoke(percentileImpl,
                            new Object[] {Double.valueOf(p)});
        } catch (NoSuchMethodException e1) { // Setter guard should prevent
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
                  percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
        } catch (IllegalAccessException e2) {
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
                  SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
        } catch (InvocationTargetException e3) {
            throw new IllegalStateException(e3.getCause());
        }
    }
    return apply(percentileImpl);
}
 
Example #12
Source File: DescriptiveStatistics.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns an estimate for the pth percentile of the stored values.
 * <p>
 * The implementation provided here follows the first estimation procedure presented
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
 * </p><p>
 * <strong>Preconditions</strong>:<ul>
 * <li><code>0 &lt; p &le; 100</code> (otherwise an
 * <code>MathIllegalArgumentException</code> is thrown)</li>
 * <li>at least one value must be stored (returns <code>Double.NaN
 *     </code> otherwise)</li>
 * </ul></p>
 *
 * @param p the requested percentile (scaled from 0 - 100)
 * @return An estimate for the pth percentile of the stored data
 * @throws MathIllegalStateException if percentile implementation has been
 *  overridden and the supplied implementation does not support setQuantile
 * @throws MathIllegalArgumentException if p is not a valid quantile
 */
public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException {
    if (percentileImpl instanceof Percentile) {
        ((Percentile) percentileImpl).setQuantile(p);
    } else {
        try {
            percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
                    new Class[] {Double.TYPE}).invoke(percentileImpl,
                            new Object[] {Double.valueOf(p)});
        } catch (NoSuchMethodException e1) { // Setter guard should prevent
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
                  percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
        } catch (IllegalAccessException e2) {
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
                  SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
        } catch (InvocationTargetException e3) {
            throw new IllegalStateException(e3.getCause());
        }
    }
    return apply(percentileImpl);
}
 
Example #13
Source File: DescriptiveStatistics.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns an estimate for the pth percentile of the stored values.
 * <p>
 * The implementation provided here follows the first estimation procedure presented
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
 * </p><p>
 * <strong>Preconditions</strong>:<ul>
 * <li><code>0 &lt; p &le; 100</code> (otherwise an
 * <code>MathIllegalArgumentException</code> is thrown)</li>
 * <li>at least one value must be stored (returns <code>Double.NaN
 *     </code> otherwise)</li>
 * </ul></p>
 *
 * @param p the requested percentile (scaled from 0 - 100)
 * @return An estimate for the pth percentile of the stored data
 * @throws MathIllegalStateException if percentile implementation has been
 *  overridden and the supplied implementation does not support setQuantile
 * @throws MathIllegalArgumentException if p is not a valid quantile
 */
public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException {
    if (percentileImpl instanceof Percentile) {
        ((Percentile) percentileImpl).setQuantile(p);
    } else {
        try {
            percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
                    new Class[] {Double.TYPE}).invoke(percentileImpl,
                            new Object[] {Double.valueOf(p)});
        } catch (NoSuchMethodException e1) { // Setter guard should prevent
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
                  percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
        } catch (IllegalAccessException e2) {
            throw new MathIllegalStateException(
                  LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
                  SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
        } catch (InvocationTargetException e3) {
            throw new IllegalStateException(e3.getCause());
        }
    }
    return apply(percentileImpl);
}
 
Example #14
Source File: MomentSolverTest.java    From momentsketch with Apache License 2.0 6 votes vote down vote up
@Test
public void testFromRaw() {
    int n = 1000;
    double[] xVals = new double[n];
    for (int i = 0 ; i < n; i++) {
        xVals[i] = i;
    }
    MomentStruct mData = new MomentStruct(10);
    mData.add(xVals);

    MomentSolver ms = new MomentSolver(mData);
    ms.setGridSize(1024);
    ms.solve();
    double q = ms.getQuantile(.9);

    Percentile p = new Percentile();
    p.setData(xVals);
    double truep90 = p.evaluate(90.0);
    assertEquals(truep90, q, 1.0);

    double[] ps = {0, .1, .5, .9, 1.0};
    double[] qs = ms.getQuantiles(ps);
    assertEquals(0.0, qs[0], 1.0);
    assertEquals(truep90, qs[3], 1.0);
}
 
Example #15
Source File: QuantileClassifierTest.java    From macrobase with Apache License 2.0 5 votes vote down vote up
@Test
public void testClassify() throws Exception {
    assertEquals(length, df.getNumRows());
    QuantileClassifier ac = new QuantileClassifier(
            "count",
            quantileColumnsMap
    );
    ac.process(df);
    DataFrame output = ac.getResults();
    assertEquals(df.getNumRows(), output.getNumRows());
    assertEquals(7, df.getSchema().getNumColumns());
    assertEquals(8, output.getSchema().getNumColumns());

    Percentile percentile = new Percentile();
    percentile.setData(rawData);
    double trueLowCutoff = percentile.evaluate(1);
    double trueHighCutoff = percentile.evaluate(99);
    assertEquals(trueLowCutoff, ac.getLowCutoff(), 5.0);
    assertEquals(trueHighCutoff, ac.getHighCutoff(), 5.0);

    double[] outliers = output.getDoubleColumnByName("_OUTLIER");

    for (int i = 0; i < outliers.length; i++) {
        int trueNumOutliers = 0;
        double[] rawGroup = rawGroups.get(i);
        for (int j = 0; j < rawGroup.length; j++) {
            if (rawGroup[j] < trueLowCutoff || rawGroup[j] > trueHighCutoff) {
                trueNumOutliers++;
            }
        }
        assertEquals(trueNumOutliers, outliers[i], 5.0);
    }
}
 
Example #16
Source File: QuantileClassifierTest.java    From macrobase with Apache License 2.0 5 votes vote down vote up
@Test
public void testConfigure() throws Exception {
    QuantileClassifier ac = new QuantileClassifier(
            "col1",
            new LinkedHashMap<>()
    );
    ac.setCountColumnName("count");
    ac.setQuantileColumnNames(quantileColumnNames);
    ac.setQuantiles(quantiles);
    ac.setIncludeHigh(false);
    ac.setIncludeLow(true);
    ac.setOutputColumnName("_OUT");
    ac.setPercentile(5.0);

    ac.process(df);
    DataFrame output = ac.getResults();
    assertEquals(df.getNumRows(), output.getNumRows());

    Percentile percentile = new Percentile();
    percentile.setData(rawData);
    double trueLowCutoff = percentile.evaluate(5);
    assertEquals(trueLowCutoff, ac.getLowCutoff(), 5.0);

    double[] outliers = output.getDoubleColumnByName("_OUT");

    for (int i = 0; i < outliers.length; i++) {
        int trueNumOutliers = 0;
        double[] rawGroup = rawGroups.get(i);
        for (int j = 0; j < rawGroup.length; j++) {
            if (rawGroup[j] < trueLowCutoff) {
                trueNumOutliers++;
            }
        }
        assertEquals(trueNumOutliers, outliers[i], 5.0);
    }
}
 
Example #17
Source File: DecileCollection.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Constructs a DecileCollection from a list of samples using Apache Commons {@link Percentile}.
 * @param samples   list of samples (caution should be used if this contains NaN or infinite values)
 */
public DecileCollection(final List<Double> samples) {
    Utils.nonNull(samples);
    Utils.validateArg(!samples.isEmpty(), "Cannot construct deciles for empty list of samples.");

    final Percentile percentile = new Percentile();
    percentile.setData(Doubles.toArray(samples));
    final Decile[] decileKeys = Decile.values();
    for (int i = 1; i < 10; i++) {
        final double decile = percentile.evaluate(10 * i);
        deciles.put(decileKeys[i - 1], decile);
    }
}
 
Example #18
Source File: CNLOHCaller.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private double calculateSegmentMeanBiasInCRSpace(final List<ACNVModeledSegment> segments) {
    Utils.nonNull(segments);

    final double neutralCRApprox = 1;

    // Only consider values "close enough" to copy neutral (CR == 1).
    final double[] neutralSegmentMeans = segments.stream().mapToDouble(ACNVModeledSegment::getSegmentMeanInCRSpace)
            .filter(x -> Math.abs(x - neutralCRApprox) < CLOSE_ENOUGH_TO_COPY_NEUTRAL_IN_CR)
            .toArray();

    return new Percentile().evaluate(neutralSegmentMeans) - 1;
}
 
Example #19
Source File: QuantileAggregator.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public QuantileAggregator(final double quantile) {
	if (quantile < 0 || quantile > 1) {
		throw new IllegalArgumentException("Quantile values have to be in [0, 1]");
	}
	this.maxQuantile = new Percentile(1 - quantile);
	this.minQuantile = new Percentile(quantile);
}
 
Example #20
Source File: MovingMedianEvaluator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Object doWork(Object first, Object second) throws IOException{
  if(null == first){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
  }
  if(null == second){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));
  }
  if(!(first instanceof List<?>)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a List",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }
  if(!(second instanceof Number)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a Number",toExpression(constructingFactory), first.getClass().getSimpleName()));
  }

  List<?> values = (List<?>)first;
  int window = ((Number)second).intValue();

  List<Number> moving = new ArrayList<>();
  DescriptiveStatistics slider = new DescriptiveStatistics(window);
  Percentile percentile = new Percentile();
  for(Object value : values){
    slider.addValue(((Number)value).doubleValue());
    if(slider.getN() >= window){
      double median = percentile.evaluate(slider.getValues(), 50);
      moving.add(median);
    }
  }

  return moving;
}
 
Example #21
Source File: DecileCollection.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Constructs a DecileCollection from a list of samples using Apache Commons {@link Percentile}.
 * @param samples   list of samples (caution should be used if this contains NaN or infinite values)
 */
public DecileCollection(final List<Double> samples) {
    Utils.nonNull(samples);
    Utils.validateArg(!samples.isEmpty(), "Cannot construct deciles for empty list of samples.");

    final Percentile percentile = new Percentile();
    percentile.setData(Doubles.toArray(samples));
    final Decile[] decileKeys = Decile.values();
    for (int i = 1; i < 10; i++) {
        final double decile = percentile.evaluate(10 * i);
        deciles.put(decileKeys[i - 1], decile);
    }
}
 
Example #22
Source File: Winsorizer.java    From macrobase with Apache License 2.0 5 votes vote down vote up
public List<double[]> process(List<double[]> metrics) {
    int n = metrics.size();
    int k = metrics.get(0).length;
    Percentile p = new Percentile();
    bounds = new double[k][2];
    List<double[]> newMetrics = new ArrayList<>(n);
    for (int i = 0; i < n; i++) {
        newMetrics.add(new double[k]);
    }

    double[] curDimensionValues = new double[n];
    for (int j = 0; j < k; j++) {
        for (int i = 0; i < n; i++) {
            curDimensionValues[i] = metrics.get(i)[j];
        }
        p.setData(curDimensionValues);
        bounds[j][0] = p.evaluate(trimPct);
        bounds[j][1] = p.evaluate(100 - trimPct);
        for (int i = 0; i < n; i++) {
            double curValue = curDimensionValues[i];
            if (curValue > bounds[j][1]) {
                newMetrics.get(i)[j] = bounds[j][1];
            } else if (curValue < bounds[j][0]) {
                newMetrics.get(i)[j] = bounds[j][0];
            } else {
                newMetrics.get(i)[j] = curValue;
            }
        }
    }

    return newMetrics;
}
 
Example #23
Source File: PercentileAggregator.java    From rapidminer-studio with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
protected double getValue() {
	Percentile percentileCalc = new Percentile();
	percentileCalc.setData(ArrayUtils.toPrimitive(elements.toArray(new Double[0])));
	return percentileCalc.evaluate(percentile);
}
 
Example #24
Source File: ComplexDoubleVector.java    From jpmml-evaluator with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public double doublePercentile(int percentile){

	if(this.size == 0){
		throw new IllegalStateException();
	}

	double[] data = new double[this.size];

	System.arraycopy(this.values, 0, data, 0, data.length);

	Arrays.sort(data);

	Percentile statistic = new Percentile();
	statistic.setData(data);

	return statistic.evaluate(percentile);
}
 
Example #25
Source File: ContaminationModel.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private Pair<Double, Double> calculateContamination(final Strategy strategy, final List<PileupSummary> tumorSites, final double minMaf) {
    final boolean useHomAlt = strategy == Strategy.HOM_ALT;
    final List<PileupSummary> genotypingHoms;
    if (strategy == Strategy.HOM_ALT) {
        genotypingHoms = homAlts(minMaf);
    } else if (strategy == Strategy.HOM_REF) {
        genotypingHoms = homRefs(minMaf);
    } else {
        final List<PileupSummary> candidateHomRefs = tumorSites.stream()
                .filter(site -> site.getAltFraction() < UNSCRUPULOUS_HOM_REF_ALLELE_FRACTION)
                .collect(Collectors.toList());
        final double altFractionThreshold = Math.max(MINIMUM_UNSCRUPULOUS_HOM_REF_ALT_FRACTION_THRESHOLD,
                new Percentile(UNSCRUPULOUS_HOM_REF_PERCENTILE).evaluate(candidateHomRefs.stream().mapToDouble(PileupSummary::getAltFraction).toArray()));
        genotypingHoms = candidateHomRefs.stream().filter(site -> site.getAltFraction() <= altFractionThreshold).collect(Collectors.toList());
    }
    final List<PileupSummary> homs = subsetSites(tumorSites, genotypingHoms);
    final double tumorErrorRate = calculateErrorRate(tumorSites);

    // depth of ref in hom alt or alt in hom ref
    final ToIntFunction<PileupSummary> oppositeCount = useHomAlt ? PileupSummary::getRefCount : PileupSummary::getAltCount;
    final ToDoubleFunction<PileupSummary> oppositeAlleleFrequency = useHomAlt ? PileupSummary::getRefFrequency : PileupSummary::getAlleleFrequency;

    final long totalDepth = homs.stream().mapToLong(PileupSummary::getTotalCount).sum();

    // total reaad count of ref in hom alt or alt in hom ref, as the case may be
    final long oppositeDepth = homs.stream().mapToLong(oppositeCount::applyAsInt).sum();
    final long errorDepth = Math.round(totalDepth * tumorErrorRate / 3);
    final long contaminationOppositeDepth = Math.max(oppositeDepth - errorDepth, 0);


    final double totalDepthWeightedByOppositeFrequency = homs.stream()
            .mapToDouble(ps -> ps.getTotalCount() * oppositeAlleleFrequency.applyAsDouble(ps))
            .sum();

    final double contamination = contaminationOppositeDepth / totalDepthWeightedByOppositeFrequency;

    final double stdError = homs.isEmpty() ? 1 : Math.sqrt(homs.stream().mapToDouble(ps -> {
        final double d = ps.getTotalCount();
        final double f = 1 - oppositeAlleleFrequency.applyAsDouble(ps);
        return (1 - f) * d * contamination * ((1 - contamination) + f * d * contamination);
    }).sum()) / totalDepthWeightedByOppositeFrequency;

    return Pair.of(Math.min(contamination, 1.0), stdError);
}
 
Example #26
Source File: SlowBrokerFinder.java    From cruise-control with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public SlowBrokerFinder() {
  _brokerSlownessScore = new HashMap<>();
  _detectedSlowBrokers = new HashMap<>();
  _percentile = new Percentile();
}
 
Example #27
Source File: DescriptiveStatisticsTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Override
public Percentile copy() {
    subPercentile result = new subPercentile();
    return result;
}
 
Example #28
Source File: PercentileMetricAnomalyFinder.java    From cruise-control with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public PercentileMetricAnomalyFinder() {
  _percentile = new Percentile();
}
 
Example #29
Source File: DescriptiveStatisticsTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Override
public Percentile copy() {
    subPercentile result = new subPercentile();
    return result;
}
 
Example #30
Source File: DescriptiveStatisticsTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Override
public Percentile copy() {
    subPercentile result = new subPercentile();
    return result;
}