org.apache.commons.math3.distribution.ChiSquaredDistribution Java Examples

The following examples show how to use org.apache.commons.math3.distribution.ChiSquaredDistribution. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StatsUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
/**
 * This method offers effective calculation for multiple entries rather than calculation
 * individually
 * 
 * @param observeds means non-negative matrix
 * @param expecteds means positive matrix
 * @return (chi2 value[], p value[])
 */
public static Map.Entry<double[], double[]> chiSquare(@Nonnull final double[][] observeds,
        @Nonnull final double[][] expecteds) {
    Preconditions.checkArgument(observeds.length == expecteds.length);

    final int len = expecteds.length;
    final int lenOfEach = expecteds[0].length;

    final ChiSquaredDistribution distribution = new ChiSquaredDistribution(lenOfEach - 1.d);

    final double[] chi2s = new double[len];
    final double[] ps = new double[len];
    for (int i = 0; i < len; i++) {
        chi2s[i] = chiSquare(observeds[i], expecteds[i]);
        ps[i] = 1.d - distribution.cumulativeProbability(chi2s[i]);
    }

    return new AbstractMap.SimpleEntry<double[], double[]>(chi2s, ps);
}
 
Example #2
Source File: ChiSquareTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
/**
 * @param crossTabWithId: f0 is id, f1 is cross table
 * @return tuple4: f0 is id which is id of cross table, f1 is pValue, f2 is chi-square Value, f3 is df
 */
protected static Tuple4<Integer, Double, Double, Double> test(Tuple2<Integer, Crosstab> crossTabWithId) {
    int colIdx = crossTabWithId.f0;
    Crosstab crosstab = crossTabWithId.f1;

    int rowLen = crosstab.rowTags.size();
    int colLen = crosstab.colTags.size();

    //compute row sum and col sum
    double[] rowSum = crosstab.rowSum();
    double[] colSum = crosstab.colSum();
    double n = crosstab.sum();


    //compute statistic value
    double chiSq = 0;
    for (int i = 0; i < rowLen; i++) {
        for (int j = 0; j < colLen; j++) {
            double nij = rowSum[i] * colSum[j] / n;
            double temp = crosstab.data[i][j] - nij;
            chiSq += temp * temp / nij;
        }
    }

    //set result
    double p;
    if (rowLen <= 1 || colLen <= 1) {
        p = 1;
    } else {
        ChiSquaredDistribution distribution =
            new ChiSquaredDistribution(null, (rowLen - 1) * (colLen - 1));
        p = 1.0 - distribution.cumulativeProbability(Math.abs(chiSq));
    }

    return Tuple4.of(colIdx, p, chiSq, (double)(rowLen - 1) * (colLen - 1));
}
 
Example #3
Source File: AlleleFrequencyQC.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public Object  onTraversalSuccess() {

    super.onTraversalSuccess();

    final GATKReportTable table= new GATKReport(outFile).getTable(MODULES_TO_USE.get(0));
    final List<String> columnNames = table.getColumnInfo().stream().map(c -> c.getColumnName()).collect(Collectors.toList());

    // this is a map of allele frequency bin : length 2 list of observed allele frequencies ( one for comp, one for eval )

    final Map<Object, List<Object>> afMap = IntStream.range(0, table.getNumRows()).mapToObj(i -> table.getRow(i)).
            filter(r -> r[columnNames.indexOf("Filter")].equals("called")).
            collect(Collectors.groupingBy(r -> r[columnNames.indexOf("AlleleFrequency")],
                    Collectors.mapping(r -> r[columnNames.indexOf("avgVarAF")], Collectors.toList())));

    final ChiSquaredDistribution dist = new ChiSquaredDistribution(afMap.size()-1);
    final double chiSqValue = calculateChiSquaredStatistic(afMap, allowedVariance);
    final double pVal = 1- dist.cumulativeProbability(chiSqValue);
    final MetricsFile<AlleleFrequencyQCMetric, Integer> metricsFile = new MetricsFile<>();
    final AlleleFrequencyQCMetric metric = new AlleleFrequencyQCMetric();

    metric.SAMPLE = sample;
    metric.CHI_SQ_VALUE =  chiSqValue;
    metric.METRIC_TYPE = "Allele Frequency";
    metric.METRIC_VALUE = pVal;

    metricsFile.addMetric(metric);
    MetricsUtils.saveMetrics(metricsFile,   metricOutput.getAbsolutePath());

    // need the file returned from variant eval in order to run the plotting stuff
    final RScriptExecutor executer = new RScriptExecutor();
    executer.addScript(new Resource(R_SCRIPT, AlleleFrequencyQC.class));
    executer.addArgs(outFile.getAbsolutePath() , metricOutput.getAbsolutePath(), sample);
    executer.exec();

    if (pVal < threshold) {
        logger.error("Allele frequencies between your array VCF and the expected VCF do not match with a significant pvalue of " + pVal);
    }
    return null;
}
 
Example #4
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextChiSquare() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistribution(12));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextChiSquare(12);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #5
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextChiSquare() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistribution(12));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextChiSquare(12);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #6
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextChiSquare() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistribution(12));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextChiSquare(12);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #7
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextChiSquare() throws Exception {
    double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistribution(12));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextChiSquare(12);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #8
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextChiSquare() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistribution(12));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextChiSquare(12);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #9
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextChiSquare() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistribution(12));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextChiSquare(12);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #10
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextChiSquare() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistribution(12));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextChiSquare(12);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #11
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextChiSquare() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistribution(12));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextChiSquare(12);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #12
Source File: StatisticTest.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testExternalFunctions()
{
    // chiSquaredTests
    int degreesOfFreedom = 95;
    ChiSquaredDistribution chiSquDist = new ChiSquaredDistribution(degreesOfFreedom);

    double result = chiSquDist.cumulativeProbability(135);

    PoissonDistribution poisson = new PoissonDistribution(100);

    double prob = poisson.cumulativeProbability(99);
    prob = poisson.cumulativeProbability(110);

}
 
Example #13
Source File: MinCovDet.java    From macrobase with Apache License 2.0 5 votes vote down vote up
public double getZScoreEquivalent(double zscore) {
    // compute zscore to CDF
    double cdf = (new NormalDistribution()).cumulativeProbability(zscore);
    // for normal distribution, mahalanobis distance is chi-squared
    // https://en.wikipedia.org/wiki/Mahalanobis_distance#Normal_distributions
    return (new ChiSquaredDistribution(p)).inverseCumulativeProbability(cdf);
}
 
Example #14
Source File: StatsUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
/**
 * @param observed means non-negative vector
 * @param expected means positive vector
 * @return p value
 */
public static double chiSquareTest(@Nonnull final double[] observed,
        @Nonnull final double[] expected) {
    final ChiSquaredDistribution distribution =
            new ChiSquaredDistribution(expected.length - 1.d);
    return 1.d - distribution.cumulativeProbability(chiSquare(observed, expected));
}
 
Example #15
Source File: StatisticsAssert.java    From jenetics with Apache License 2.0 4 votes vote down vote up
private static double chi(final double p, final int degreeOfFreedom) {
	return new ChiSquaredDistribution(degreeOfFreedom)
		.inverseCumulativeProbability(p);
}
 
Example #16
Source File: CompressedSizeEstimatorSample.java    From systemds with Apache License 2.0 4 votes vote down vote up
private static CriticalValue computeCriticalValue(int sampleSize) {
	ChiSquaredDistribution chiSqr = new ChiSquaredDistribution(sampleSize - 1);
	return new CriticalValue(chiSqr.inverseCumulativeProbability(SHLOSSER_JACKKNIFE_ALPHA), sampleSize);
}
 
Example #17
Source File: ShlosserJackknifeEstimator.java    From systemds with Apache License 2.0 4 votes vote down vote up
private static CriticalValue computeCriticalValue(int sampleSize) {
	ChiSquaredDistribution chiSqr = new ChiSquaredDistribution(sampleSize - 1);
	return new CriticalValue(chiSqr.inverseCumulativeProbability(SHLOSSER_JACKKNIFE_ALPHA), sampleSize);
}
 
Example #18
Source File: GTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> p-value</a>,
 * associated with a G-Test for goodness of fit</a> comparing the
 * {@code observed} frequency counts to those in the {@code expected} array.
 *
 * <p>The number returned is the smallest significance level at which one
 * can reject the null hypothesis that the observed counts conform to the
 * frequency distribution described by the expected counts.</p>
 *
 * <p>The probability returned is the tail probability beyond
 * {@link #g(double[], long[]) g(expected, observed)}
 * in the ChiSquare distribution with degrees of freedom one less than the
 * common length of {@code expected} and {@code observed}.</p>
 *
 * <p> <strong>Preconditions</strong>: <ul>
 * <li>Expected counts must all be positive. </li>
 * <li>Observed counts must all be &ge; 0. </li>
 * <li>The observed and expected arrays must have the
 * same length and their common length must be at least 2.</li>
 * </ul></p>
 *
 * <p>If any of the preconditions are not met, a
 * {@code MathIllegalArgumentException} is thrown.</p>
 *
 * <p><strong>Note:</strong>This implementation rescales the
 * {@code expected} array if necessary to ensure that the sum of the
 *  expected and observed counts are equal.</p>
 *
 * @param observed array of observed frequency counts
 * @param expected array of expected frequency counts
 * @return p-value
 * @throws NotPositiveException if {@code observed} has negative entries
 * @throws NotStrictlyPositiveException if {@code expected} has entries that
 * are not strictly positive
 * @throws DimensionMismatchException if the array lengths do not match or
 * are less than 2.
 * @throws MaxCountExceededException if an error occurs computing the
 * p-value.
 */
public double gTest(final double[] expected, final long[] observed)
        throws NotPositiveException, NotStrictlyPositiveException,
        DimensionMismatchException, MaxCountExceededException {

    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final ChiSquaredDistribution distribution =
            new ChiSquaredDistribution(null, expected.length - 1.0);
    return 1.0 - distribution.cumulativeProbability(g(expected, observed));
}
 
Example #19
Source File: ChiSquareTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
 * chi-square test of independence</a> based on the input <code>counts</code>
 * array, viewed as a two-way table.
 * <p>
 * The rows of the 2-way table are
 * <code>count[0], ... , count[count.length - 1] </code></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>All counts must be &ge; 0.
 * </li>
 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have
 *     the same length).
 * </li>
 * <li>The 2-way table represented by <code>counts</code> must have at least 2
 *     columns and at least 2 rows.
 * </li>
 * </li></ul></p><p>
 * If any of the preconditions are not met, an
 * <code>IllegalArgumentException</code> is thrown.</p>
 *
 * @param counts array representation of 2-way table
 * @return p-value
 * @throws NullArgumentException if the array is null
 * @throws DimensionMismatchException if the array is not rectangular
 * @throws NotPositiveException if {@code counts} has negative entries
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double chiSquareTest(final long[][] counts)
    throws NullArgumentException, DimensionMismatchException,
    NotPositiveException, MaxCountExceededException {

    checkArray(counts);
    double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
    return 1 - distribution.cumulativeProbability(chiSquare(counts));

}
 
Example #20
Source File: ChiSquareTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a
 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
 * Chi-square goodness of fit test</a> comparing the <code>observed</code>
 * frequency counts to those in the <code>expected</code> array.
 * <p>
 * The number returned is the smallest significance level at which one can reject
 * the null hypothesis that the observed counts conform to the frequency distribution
 * described by the expected counts.</p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>Expected counts must all be positive.
 * </li>
 * <li>Observed counts must all be &ge; 0.
 * </li>
 * <li>The observed and expected arrays must have the same length and
 * their common length must be at least 2.
 * </li></ul></p><p>
 * If any of the preconditions are not met, an
 * <code>IllegalArgumentException</code> is thrown.</p>
 * <p><strong>Note: </strong>This implementation rescales the
 * <code>expected</code> array if necessary to ensure that the sum of the
 * expected and observed counts are equal.</p>
 *
 * @param observed array of observed frequency counts
 * @param expected array of expected frequency counts
 * @return p-value
 * @throws NotPositiveException if <code>observed</code> has negative entries
 * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
 * not strictly positive
 * @throws DimensionMismatchException if the arrays length is less than 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double chiSquareTest(final double[] expected, final long[] observed)
    throws NotPositiveException, NotStrictlyPositiveException,
    DimensionMismatchException, MaxCountExceededException {

    ChiSquaredDistribution distribution =
        new ChiSquaredDistribution(expected.length - 1.0);
    return 1.0 - distribution.cumulativeProbability(chiSquare(expected, observed));
}
 
Example #21
Source File: GTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue"> p-value</a>,
 * associated with a G-Test for goodness of fit</a> comparing the
 * {@code observed} frequency counts to those in the {@code expected} array.
 *
 * <p>The number returned is the smallest significance level at which one
 * can reject the null hypothesis that the observed counts conform to the
 * frequency distribution described by the expected counts.</p>
 *
 * <p>The probability returned is the tail probability beyond
 * {@link #g(double[], long[]) g(expected, observed)}
 * in the ChiSquare distribution with degrees of freedom one less than the
 * common length of {@code expected} and {@code observed}.</p>
 *
 * <p> <strong>Preconditions</strong>: <ul>
 * <li>Expected counts must all be positive. </li>
 * <li>Observed counts must all be &ge; 0. </li>
 * <li>The observed and expected arrays must have the
 * same length and their common length must be at least 2.</li>
 * </ul></p>
 *
 * <p>If any of the preconditions are not met, a
 * {@code MathIllegalArgumentException} is thrown.</p>
 *
 * <p><strong>Note:</strong>This implementation rescales the
 * {@code expected} array if necessary to ensure that the sum of the
 *  expected and observed counts are equal.</p>
 *
 * @param observed array of observed frequency counts
 * @param expected array of expected frequency counts
 * @return p-value
 * @throws NotPositiveException if {@code observed} has negative entries
 * @throws NotStrictlyPositiveException if {@code expected} has entries that
 * are not strictly positive
 * @throws DimensionMismatchException if the array lengths do not match or
 * are less than 2.
 * @throws MaxCountExceededException if an error occurs computing the
 * p-value.
 */
public double gTest(final double[] expected, final long[] observed)
        throws NotPositiveException, NotStrictlyPositiveException,
        DimensionMismatchException, MaxCountExceededException {

    final ChiSquaredDistribution distribution =
            new ChiSquaredDistribution(expected.length - 1.0);
    return 1.0 - distribution.cumulativeProbability(
            g(expected, observed));
}
 
Example #22
Source File: GTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the intrinsic (Hardy-Weinberg proportions) p-Value, as described
 * in p64-69 of McDonald, J.H. 2009. Handbook of Biological Statistics
 * (2nd ed.). Sparky House Publishing, Baltimore, Maryland.
 *
 * <p> The probability returned is the tail probability beyond
 * {@link #g(double[], long[]) g(expected, observed)}
 * in the ChiSquare distribution with degrees of freedom two less than the
 * common length of {@code expected} and {@code observed}.</p>
 *
 * @param observed array of observed frequency counts
 * @param expected array of expected frequency counts
 * @return p-value
 * @throws NotPositiveException if {@code observed} has negative entries
 * @throws NotStrictlyPositiveException {@code expected} has entries that are
 * not strictly positive
 * @throws DimensionMismatchException if the array lengths do not match or
 * are less than 2.
 * @throws MaxCountExceededException if an error occurs computing the
 * p-value.
 */
public double gTestIntrinsic(final double[] expected, final long[] observed)
        throws NotPositiveException, NotStrictlyPositiveException,
        DimensionMismatchException, MaxCountExceededException {

    final ChiSquaredDistribution distribution =
            new ChiSquaredDistribution(expected.length - 2.0);
    return 1.0 - distribution.cumulativeProbability(
            g(expected, observed));
}
 
Example #23
Source File: GTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * <p>Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a G-Value (Log-Likelihood Ratio) for two
 * sample test comparing bin frequency counts in {@code observed1} and
 * {@code observed2}.</p>
 *
 * <p>The number returned is the smallest significance level at which one
 * can reject the null hypothesis that the observed counts conform to the
 * same distribution. </p>
 *
 * <p>See {@link #gTest(double[], long[])} for details
 * on how the p-value is computed.  The degrees of of freedom used to
 * perform the test is one less than the common length of the input observed
 * count arrays.</p>
 *
 * <p><strong>Preconditions</strong>:
 * <ul> <li>Observed counts must be non-negative. </li>
 * <li>Observed counts for a specific bin must not both be zero. </li>
 * <li>Observed counts for a specific sample must not all be 0. </li>
 * <li>The arrays {@code observed1} and {@code observed2} must
 * have the same length and their common length must be at least 2. </li>
 * </ul><p>
 * <p> If any of the preconditions are not met, a
 * {@code MathIllegalArgumentException} is thrown.</p>
 *
 * @param observed1 array of observed frequency counts of the first data set
 * @param observed2 array of observed frequency counts of the second data
 * set
 * @return p-value
 * @throws DimensionMismatchException the the length of the arrays does not
 * match or their common length is less than 2
 * @throws NotPositiveException if any of the entries in {@code observed1} or
 * {@code observed2} are negative
 * @throws ZeroException if either all counts of {@code observed1} or
 * {@code observed2} are zero, or if the count at some index is
 * zero for both arrays
 * @throws MaxCountExceededException if an error occurs computing the
 * p-value.
 */
public double gTestDataSetsComparison(final long[] observed1,
        final long[] observed2)
        throws DimensionMismatchException, NotPositiveException, ZeroException,
        MaxCountExceededException {
    final ChiSquaredDistribution distribution = new ChiSquaredDistribution(
            (double) observed1.length - 1);
    return 1 - distribution.cumulativeProbability(
            gDataSetsComparison(observed1, observed2));
}
 
Example #24
Source File: ChiSquareTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a
 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
 * Chi-square goodness of fit test</a> comparing the <code>observed</code>
 * frequency counts to those in the <code>expected</code> array.
 * <p>
 * The number returned is the smallest significance level at which one can reject
 * the null hypothesis that the observed counts conform to the frequency distribution
 * described by the expected counts.</p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>Expected counts must all be positive.
 * </li>
 * <li>Observed counts must all be &ge; 0.
 * </li>
 * <li>The observed and expected arrays must have the same length and
 * their common length must be at least 2.
 * </li></ul></p><p>
 * If any of the preconditions are not met, an
 * <code>IllegalArgumentException</code> is thrown.</p>
 * <p><strong>Note: </strong>This implementation rescales the
 * <code>expected</code> array if necessary to ensure that the sum of the
 * expected and observed counts are equal.</p>
 *
 * @param observed array of observed frequency counts
 * @param expected array of expected frequency counts
 * @return p-value
 * @throws NotPositiveException if <code>observed</code> has negative entries
 * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
 * not strictly positive
 * @throws DimensionMismatchException if the arrays length is less than 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double chiSquareTest(final double[] expected, final long[] observed)
    throws NotPositiveException, NotStrictlyPositiveException,
    DimensionMismatchException, MaxCountExceededException {

    ChiSquaredDistribution distribution =
        new ChiSquaredDistribution(expected.length - 1.0);
    return 1.0 - distribution.cumulativeProbability(chiSquare(expected, observed));
}
 
Example #25
Source File: GTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * <p>Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a G-Value (Log-Likelihood Ratio) for two
 * sample test comparing bin frequency counts in {@code observed1} and
 * {@code observed2}.</p>
 *
 * <p>The number returned is the smallest significance level at which one
 * can reject the null hypothesis that the observed counts conform to the
 * same distribution. </p>
 *
 * <p>See {@link #gTest(double[], long[])} for details
 * on how the p-value is computed.  The degrees of of freedom used to
 * perform the test is one less than the common length of the input observed
 * count arrays.</p>
 *
 * <p><strong>Preconditions</strong>:
 * <ul> <li>Observed counts must be non-negative. </li>
 * <li>Observed counts for a specific bin must not both be zero. </li>
 * <li>Observed counts for a specific sample must not all be 0. </li>
 * <li>The arrays {@code observed1} and {@code observed2} must
 * have the same length and their common length must be at least 2. </li>
 * </ul><p>
 * <p> If any of the preconditions are not met, a
 * {@code MathIllegalArgumentException} is thrown.</p>
 *
 * @param observed1 array of observed frequency counts of the first data set
 * @param observed2 array of observed frequency counts of the second data
 * set
 * @return p-value
 * @throws DimensionMismatchException the the length of the arrays does not
 * match or their common length is less than 2
 * @throws NotPositiveException if any of the entries in {@code observed1} or
 * {@code observed2} are negative
 * @throws ZeroException if either all counts of {@code observed1} or
 * {@code observed2} are zero, or if the count at some index is
 * zero for both arrays
 * @throws MaxCountExceededException if an error occurs computing the
 * p-value.
 */
public double gTestDataSetsComparison(final long[] observed1,
        final long[] observed2)
        throws DimensionMismatchException, NotPositiveException, ZeroException,
        MaxCountExceededException {
    final ChiSquaredDistribution distribution = new ChiSquaredDistribution(
            (double) observed1.length - 1);
    return 1 - distribution.cumulativeProbability(
            gDataSetsComparison(observed1, observed2));
}
 
Example #26
Source File: GTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the intrinsic (Hardy-Weinberg proportions) p-Value, as described
 * in p64-69 of McDonald, J.H. 2009. Handbook of Biological Statistics
 * (2nd ed.). Sparky House Publishing, Baltimore, Maryland.
 *
 * <p> The probability returned is the tail probability beyond
 * {@link #g(double[], long[]) g(expected, observed)}
 * in the ChiSquare distribution with degrees of freedom two less than the
 * common length of {@code expected} and {@code observed}.</p>
 *
 * @param observed array of observed frequency counts
 * @param expected array of expected frequency counts
 * @return p-value
 * @throws NotPositiveException if {@code observed} has negative entries
 * @throws NotStrictlyPositiveException {@code expected} has entries that are
 * not strictly positive
 * @throws DimensionMismatchException if the array lengths do not match or
 * are less than 2.
 * @throws MaxCountExceededException if an error occurs computing the
 * p-value.
 */
public double gTestIntrinsic(final double[] expected, final long[] observed)
        throws NotPositiveException, NotStrictlyPositiveException,
        DimensionMismatchException, MaxCountExceededException {

    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final ChiSquaredDistribution distribution =
            new ChiSquaredDistribution(null, expected.length - 2.0);
    return 1.0 - distribution.cumulativeProbability(g(expected, observed));
}
 
Example #27
Source File: GTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * <p>Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a G-Value (Log-Likelihood Ratio) for two
 * sample test comparing bin frequency counts in {@code observed1} and
 * {@code observed2}.</p>
 *
 * <p>The number returned is the smallest significance level at which one
 * can reject the null hypothesis that the observed counts conform to the
 * same distribution. </p>
 *
 * <p>See {@link #gTest(double[], long[])} for details
 * on how the p-value is computed.  The degrees of of freedom used to
 * perform the test is one less than the common length of the input observed
 * count arrays.</p>
 *
 * <p><strong>Preconditions</strong>:
 * <ul> <li>Observed counts must be non-negative. </li>
 * <li>Observed counts for a specific bin must not both be zero. </li>
 * <li>Observed counts for a specific sample must not all be 0. </li>
 * <li>The arrays {@code observed1} and {@code observed2} must
 * have the same length and their common length must be at least 2. </li>
 * </ul><p>
 * <p> If any of the preconditions are not met, a
 * {@code MathIllegalArgumentException} is thrown.</p>
 *
 * @param observed1 array of observed frequency counts of the first data set
 * @param observed2 array of observed frequency counts of the second data
 * set
 * @return p-value
 * @throws DimensionMismatchException the the length of the arrays does not
 * match or their common length is less than 2
 * @throws NotPositiveException if any of the entries in {@code observed1} or
 * {@code observed2} are negative
 * @throws ZeroException if either all counts of {@code observed1} or
 * {@code observed2} are zero, or if the count at some index is
 * zero for both arrays
 * @throws MaxCountExceededException if an error occurs computing the
 * p-value.
 */
public double gTestDataSetsComparison(final long[] observed1,
        final long[] observed2)
        throws DimensionMismatchException, NotPositiveException, ZeroException,
        MaxCountExceededException {

    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final ChiSquaredDistribution distribution =
            new ChiSquaredDistribution(null, (double) observed1.length - 1);
    return 1 - distribution.cumulativeProbability(
            gDataSetsComparison(observed1, observed2));
}
 
Example #28
Source File: ChiSquareTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a
 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
 * Chi-square goodness of fit test</a> comparing the <code>observed</code>
 * frequency counts to those in the <code>expected</code> array.
 * <p>
 * The number returned is the smallest significance level at which one can reject
 * the null hypothesis that the observed counts conform to the frequency distribution
 * described by the expected counts.</p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>Expected counts must all be positive.
 * </li>
 * <li>Observed counts must all be &ge; 0.
 * </li>
 * <li>The observed and expected arrays must have the same length and
 * their common length must be at least 2.
 * </li></ul></p><p>
 * If any of the preconditions are not met, an
 * <code>IllegalArgumentException</code> is thrown.</p>
 * <p><strong>Note: </strong>This implementation rescales the
 * <code>expected</code> array if necessary to ensure that the sum of the
 * expected and observed counts are equal.</p>
 *
 * @param observed array of observed frequency counts
 * @param expected array of expected frequency counts
 * @return p-value
 * @throws NotPositiveException if <code>observed</code> has negative entries
 * @throws NotStrictlyPositiveException if <code>expected</code> has entries that are
 * not strictly positive
 * @throws DimensionMismatchException if the arrays length is less than 2
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double chiSquareTest(final double[] expected, final long[] observed)
    throws NotPositiveException, NotStrictlyPositiveException,
    DimensionMismatchException, MaxCountExceededException {

    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final ChiSquaredDistribution distribution =
        new ChiSquaredDistribution(null, expected.length - 1.0);
    return 1.0 - distribution.cumulativeProbability(chiSquare(expected, observed));
}
 
Example #29
Source File: ChiSquareTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a
 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
 * chi-square test of independence</a> based on the input <code>counts</code>
 * array, viewed as a two-way table.
 * <p>
 * The rows of the 2-way table are
 * <code>count[0], ... , count[count.length - 1] </code></p>
 * <p>
 * <strong>Preconditions</strong>: <ul>
 * <li>All counts must be &ge; 0.
 * </li>
 * <li>The count array must be rectangular (i.e. all count[i] subarrays must have
 *     the same length).
 * </li>
 * <li>The 2-way table represented by <code>counts</code> must have at least 2
 *     columns and at least 2 rows.
 * </li>
 * </li></ul></p><p>
 * If any of the preconditions are not met, an
 * <code>IllegalArgumentException</code> is thrown.</p>
 *
 * @param counts array representation of 2-way table
 * @return p-value
 * @throws NullArgumentException if the array is null
 * @throws DimensionMismatchException if the array is not rectangular
 * @throws NotPositiveException if {@code counts} has negative entries
 * @throws MaxCountExceededException if an error occurs computing the p-value
 */
public double chiSquareTest(final long[][] counts)
    throws NullArgumentException, DimensionMismatchException,
    NotPositiveException, MaxCountExceededException {

    checkArray(counts);
    double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final ChiSquaredDistribution distribution = new ChiSquaredDistribution(df);
    return 1 - distribution.cumulativeProbability(chiSquare(counts));

}
 
Example #30
Source File: ChiSquareTest.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * <p>Returns the <i>observed significance level</i>, or <a href=
 * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
 * p-value</a>, associated with a Chi-Square two sample test comparing
 * bin frequency counts in <code>observed1</code> and
 * <code>observed2</code>.
 * </p>
 * <p>The number returned is the smallest significance level at which one
 * can reject the null hypothesis that the observed counts conform to the
 * same distribution.
 * </p>
 * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for details
 * on the formula used to compute the test statistic. The degrees of
 * of freedom used to perform the test is one less than the common length
 * of the input observed count arrays.
 * </p>
 * <strong>Preconditions</strong>: <ul>
 * <li>Observed counts must be non-negative.
 * </li>
 * <li>Observed counts for a specific bin must not both be zero.
 * </li>
 * <li>Observed counts for a specific sample must not all be 0.
 * </li>
 * <li>The arrays <code>observed1</code> and <code>observed2</code> must
 * have the same length and
 * their common length must be at least 2.
 * </li></ul><p>
 * If any of the preconditions are not met, an
 * <code>IllegalArgumentException</code> is thrown.</p>
 *
 * @param observed1 array of observed frequency counts of the first data set
 * @param observed2 array of observed frequency counts of the second data set
 * @return p-value
 * @throws DimensionMismatchException the the length of the arrays does not match
 * @throws NotPositiveException if any entries in <code>observed1</code> or
 * <code>observed2</code> are negative
 * @throws ZeroException if either all counts of <code>observed1</code> or
 * <code>observed2</code> are zero, or if the count at the same index is zero
 * for both arrays
 * @throws MaxCountExceededException if an error occurs computing the p-value
 * @since 1.2
 */
public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
    throws DimensionMismatchException, NotPositiveException, ZeroException,
    MaxCountExceededException {

    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final ChiSquaredDistribution distribution =
            new ChiSquaredDistribution(null, (double) observed1.length - 1);
    return 1 - distribution.cumulativeProbability(
            chiSquareDataSetsComparison(observed1, observed2));

}