Java Code Examples for org.apache.commons.math3.distribution.FDistribution#cumulativeProbability()

The following examples show how to use org.apache.commons.math3.distribution.FDistribution#cumulativeProbability() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TTestSignificanceTestOperator.java    From rapidminer-studio with GNU Affero General Public License v3.0 6 votes vote down vote up
private double getProbability(PerformanceCriterion pc1, PerformanceCriterion pc2) {
	double totalDeviation = ((pc1.getAverageCount() - 1) * pc1.getVariance() + (pc2.getAverageCount() - 1)
			* pc2.getVariance())
			/ (pc1.getAverageCount() + pc2.getAverageCount() - 2);
	double factor = 1.0d / (1.0d / pc1.getAverageCount() + 1.0d / pc2.getAverageCount());
	double diff = pc1.getAverage() - pc2.getAverage();
	double t = factor * diff * diff / totalDeviation;
	int secondDegreeOfFreedom = pc1.getAverageCount() + pc2.getAverageCount() - 2;
	double prob;
	// make sure the F-distribution is well defined
	if (secondDegreeOfFreedom > 0) {
		FDistribution fDist = new FDistribution(1, secondDegreeOfFreedom);
		prob = 1 - fDist.cumulativeProbability(t);
	} else {
		// in this case the probability cannot calculated correctly and a 1 is returned, as
		// this result is not significant
		prob = 1;
	}

	return prob;
}
 
Example 2
Source File: TTestLinearRegressionMethod.java    From rapidminer-studio with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Returns the PValue of the attributeIndex-th attribute that expresses the probability that the
 * coefficient is only random.
 *
 * @throws ProcessStoppedException
 */
protected double getPValue(double coefficient, int attributeIndex, LinearRegression regression, boolean useBias,
		double ridge, ExampleSet exampleSet, boolean[] isUsedAttribute, double[] standardDeviations,
		double labelStandardDeviation, FDistribution fdistribution, double generalCorrelation)
		throws UndefinedParameterError, ProcessStoppedException {
	double tolerance = regression.getTolerance(exampleSet, isUsedAttribute, attributeIndex, ridge, useBias);
	double standardError = Math.sqrt((1.0d - generalCorrelation)
			/ (tolerance * (exampleSet.size() - exampleSet.getAttributes().size() - 1.0d)))
			* labelStandardDeviation / standardDeviations[attributeIndex];

	// calculating other statistics
	double tStatistics = coefficient / standardError;
	double probability = fdistribution.cumulativeProbability(tStatistics * tStatistics);
	return probability;
}
 
Example 3
Source File: AnovaCalculator.java    From rapidminer-studio with GNU Affero General Public License v3.0 5 votes vote down vote up
public AnovaSignificanceTestResult(double sumSquaresBetween, double sumSquaresResiduals, int df1, int df2,
		double alpha) {
	this.sumSquaresBetween = sumSquaresBetween;
	this.sumSquaresResiduals = sumSquaresResiduals;
	this.df1 = df1;
	this.df2 = df2;
	this.alpha = alpha;
	this.meanSquaresBetween = sumSquaresBetween / df1;
	this.meanSquaresResiduals = sumSquaresResiduals / df2;
	this.fValue = meanSquaresBetween / meanSquaresResiduals;
	FDistribution fDist = new FDistribution(df1, df2);
	this.prob = 1.0d - fDist.cumulativeProbability(this.fValue);
}
 
Example 4
Source File: SignificanceTask.java    From mzmine2 with GNU General Public License v2.0 4 votes vote down vote up
@Nullable
private Double oneWayAnova(@Nonnull double[][] intensityGroups) {

  int numGroups = intensityGroups.length;
  long numIntensities = Arrays.stream(intensityGroups)
      .flatMapToDouble(Arrays::stream)
      .count();

  double[] groupMeans = Arrays.stream(intensityGroups)
      .mapToDouble(intensities -> Arrays.stream(intensities).average().orElse(0.0))
      .toArray();

  double overallMean = Arrays.stream(intensityGroups)
      .flatMapToDouble(Arrays::stream)
      .average()
      .orElse(0.0);

  double sumOfSquaresOfError = IntStream.range(0, intensityGroups.length)
      .mapToDouble(i -> Arrays
          .stream(intensityGroups[i])
          .map(x -> x - groupMeans[i])
          .map(x -> x * x)
          .sum())
      .sum();

  double sumOfSquaresOfTreatment = (numGroups - 1) * Arrays.stream(groupMeans)
      .map(x -> x - overallMean)
      .map(x -> x * x)
      .sum();

  long degreesOfFreedomOfTreatment = numGroups - 1;
  long degreesOfFreedomOfError = numIntensities - numGroups;

  if (degreesOfFreedomOfTreatment <= 0 || degreesOfFreedomOfError <= 0) {
    return null;
  }

  double meanSquareOfTreatment = sumOfSquaresOfTreatment / degreesOfFreedomOfTreatment;
  double meanSquareOfError = sumOfSquaresOfError / degreesOfFreedomOfError;

  if (meanSquareOfError == 0.0) {
    return null;
  }

  double anovaStatistics = meanSquareOfTreatment / meanSquareOfError;

  Double pValue = null;
  try {
    FDistribution distribution = new FDistribution(
        degreesOfFreedomOfTreatment, degreesOfFreedomOfError);
    pValue = 1.0 - distribution.cumulativeProbability(anovaStatistics);
  }
  catch (MathIllegalArgumentException ex) {
    logger.warning("Error during F-distribution calculation: " + ex.getMessage());
  }

  return pValue;
}
 
Example 5
Source File: AnovaTask.java    From mzmine3 with GNU General Public License v2.0 4 votes vote down vote up
@Nullable
private Double oneWayAnova(@Nonnull double[][] intensityGroups) {

  int numGroups = intensityGroups.length;
  long numIntensities = Arrays.stream(intensityGroups).flatMapToDouble(Arrays::stream).count();

  double[] groupMeans = Arrays.stream(intensityGroups)
      .mapToDouble(intensities -> Arrays.stream(intensities).average().orElse(0.0)).toArray();

  double overallMean =
      Arrays.stream(intensityGroups).flatMapToDouble(Arrays::stream).average().orElse(0.0);

  double sumOfSquaresOfError = IntStream.range(0, intensityGroups.length).mapToDouble(
      i -> Arrays.stream(intensityGroups[i]).map(x -> x - groupMeans[i]).map(x -> x * x).sum())
      .sum();

  double sumOfSquaresOfTreatment =
      (numGroups - 1) * Arrays.stream(groupMeans).map(x -> x - overallMean).map(x -> x * x).sum();

  long degreesOfFreedomOfTreatment = numGroups - 1;
  long degreesOfFreedomOfError = numIntensities - numGroups;

  if (degreesOfFreedomOfTreatment <= 0 || degreesOfFreedomOfError <= 0) {
    return null;
  }

  double meanSquareOfTreatment = sumOfSquaresOfTreatment / degreesOfFreedomOfTreatment;
  double meanSquareOfError = sumOfSquaresOfError / degreesOfFreedomOfError;

  if (meanSquareOfError == 0.0) {
    return null;
  }

  double anovaStatistics = meanSquareOfTreatment / meanSquareOfError;

  Double pValue = null;
  try {
    FDistribution distribution =
        new FDistribution(degreesOfFreedomOfTreatment, degreesOfFreedomOfError);
    pValue = 1.0 - distribution.cumulativeProbability(anovaStatistics);
  } catch (MathIllegalArgumentException ex) {
    logger.warning("Error during F-distribution calculation: " + ex.getMessage());
  }

  return pValue;
}
 
Example 6
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 * @since 3.2
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 7
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 8
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 * @since 3.2
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 9
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 10
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 11
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 12
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 13
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 * @since 3.2
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 14
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 15
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 16
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 17
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 * @since 3.2
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example 18
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}