org.apache.commons.math3.distribution.FDistribution Java Examples

The following examples show how to use org.apache.commons.math3.distribution.FDistribution. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: XDataFrameLeastSquares.java    From morpheus-core with Apache License 2.0 6 votes vote down vote up
/**
 * Runs the regression model for the given dependent and independent variables
 * The Y and X variables must be transformed, if necessary, to meet Gauss Markov assumptions
 * @param y     the dependent variable, which may be a transformed version of the raw data
 * @param x     the independent variable(s), which may be a transformed version of the raw data
 */
protected void compute(RealVector y, RealMatrix x) {
    final int n = frame.rows().count();
    final int p = regressors.size() + (hasIntercept() ? 1 : 0);
    final int dfModel = regressors.size();
    final RealMatrix betaMatrix = computeBeta(y, x);
    final RealVector betaCoefficients = betaMatrix.getColumnVector(0);
    final RealVector betaVariance = betaMatrix.getColumnVector(1);
    this.tss = computeTSS(y);
    this.ess = tss - rss;
    this.fValue = (ess / dfModel) / (rss / (n - p));
    this.fValueProbability = 1d - new FDistribution(dfModel, n-p).cumulativeProbability(fValue);
    this.rSquared = 1d - (rss / tss);
    this.rSquaredAdj = 1d - (rss * (n - (hasIntercept() ? 1 : 0))) / (tss * (n - p));
    this.computeParameterStdErrors(betaVariance);
    this.computeParameterSignificance(betaCoefficients);
}
 
Example #2
Source File: TTestSignificanceTestOperator.java    From rapidminer-studio with GNU Affero General Public License v3.0 6 votes vote down vote up
private double getProbability(PerformanceCriterion pc1, PerformanceCriterion pc2) {
	double totalDeviation = ((pc1.getAverageCount() - 1) * pc1.getVariance() + (pc2.getAverageCount() - 1)
			* pc2.getVariance())
			/ (pc1.getAverageCount() + pc2.getAverageCount() - 2);
	double factor = 1.0d / (1.0d / pc1.getAverageCount() + 1.0d / pc2.getAverageCount());
	double diff = pc1.getAverage() - pc2.getAverage();
	double t = factor * diff * diff / totalDeviation;
	int secondDegreeOfFreedom = pc1.getAverageCount() + pc2.getAverageCount() - 2;
	double prob;
	// make sure the F-distribution is well defined
	if (secondDegreeOfFreedom > 0) {
		FDistribution fDist = new FDistribution(1, secondDegreeOfFreedom);
		prob = 1 - fDist.cumulativeProbability(t);
	} else {
		// in this case the probability cannot calculated correctly and a 1 is returned, as
		// this result is not significant
		prob = 1;
	}

	return prob;
}
 
Example #3
Source File: ClopperPearsonInterval.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/** {@inheritDoc} */
public ConfidenceInterval createInterval(int numberOfTrials, int numberOfSuccesses,
                                         double confidenceLevel) {
    IntervalUtils.checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
    double lowerBound = 0;
    double upperBound = 0;
    final double alpha = (1.0 - confidenceLevel) / 2.0;

    final FDistribution distributionLowerBound = new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
                                                                   2 * numberOfSuccesses);
    final double fValueLowerBound = distributionLowerBound.inverseCumulativeProbability(1 - alpha);
    if (numberOfSuccesses > 0) {
        lowerBound = numberOfSuccesses /
                     (numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);
    }

    final FDistribution distributionUpperBound = new FDistribution(2 * (numberOfSuccesses + 1),
                                                                   2 * (numberOfTrials - numberOfSuccesses));
    final double fValueUpperBound = distributionUpperBound.inverseCumulativeProbability(1 - alpha);
    if (numberOfSuccesses > 0) {
        upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
                     (numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
    }

    return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
}
 
Example #4
Source File: ClopperPearsonInterval.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/** {@inheritDoc} */
public ConfidenceInterval createInterval(int numberOfTrials, int numberOfSuccesses,
                                         double confidenceLevel) {
    IntervalUtils.checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
    double lowerBound = 0;
    double upperBound = 0;
    final double alpha = (1.0 - confidenceLevel) / 2.0;

    final FDistribution distributionLowerBound = new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
                                                                   2 * numberOfSuccesses);
    final double fValueLowerBound = distributionLowerBound.inverseCumulativeProbability(1 - alpha);
    if (numberOfSuccesses > 0) {
        lowerBound = numberOfSuccesses /
                     (numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);
    }

    final FDistribution distributionUpperBound = new FDistribution(2 * (numberOfSuccesses + 1),
                                                                   2 * (numberOfTrials - numberOfSuccesses));
    final double fValueUpperBound = distributionUpperBound.inverseCumulativeProbability(1 - alpha);
    if (numberOfSuccesses > 0) {
        upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
                     (numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
    }

    return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
}
 
Example #5
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextF() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextF(12, 5);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #6
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextF() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextF(12, 5);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #7
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextF() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextF(12, 5);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #8
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextF() throws Exception {
    double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextF(12, 5);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #9
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextF() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextF(12, 5);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #10
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextF() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextF(12, 5);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #11
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextF() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextF(12, 5);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #12
Source File: TTestLinearRegressionMethod.java    From rapidminer-studio with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Returns the PValue of the attributeIndex-th attribute that expresses the probability that the
 * coefficient is only random.
 *
 * @throws ProcessStoppedException
 */
protected double getPValue(double coefficient, int attributeIndex, LinearRegression regression, boolean useBias,
		double ridge, ExampleSet exampleSet, boolean[] isUsedAttribute, double[] standardDeviations,
		double labelStandardDeviation, FDistribution fdistribution, double generalCorrelation)
		throws UndefinedParameterError, ProcessStoppedException {
	double tolerance = regression.getTolerance(exampleSet, isUsedAttribute, attributeIndex, ridge, useBias);
	double standardError = Math.sqrt((1.0d - generalCorrelation)
			/ (tolerance * (exampleSet.size() - exampleSet.getAttributes().size() - 1.0d)))
			* labelStandardDeviation / standardDeviations[attributeIndex];

	// calculating other statistics
	double tStatistics = coefficient / standardError;
	double probability = fdistribution.cumulativeProbability(tStatistics * tStatistics);
	return probability;
}
 
Example #13
Source File: AnovaCalculator.java    From rapidminer-studio with GNU Affero General Public License v3.0 5 votes vote down vote up
public AnovaSignificanceTestResult(double sumSquaresBetween, double sumSquaresResiduals, int df1, int df2,
		double alpha) {
	this.sumSquaresBetween = sumSquaresBetween;
	this.sumSquaresResiduals = sumSquaresResiduals;
	this.df1 = df1;
	this.df2 = df2;
	this.alpha = alpha;
	this.meanSquaresBetween = sumSquaresBetween / df1;
	this.meanSquaresResiduals = sumSquaresResiduals / df2;
	this.fValue = meanSquaresBetween / meanSquaresResiduals;
	FDistribution fDist = new FDistribution(df1, df2);
	this.prob = 1.0d - fDist.cumulativeProbability(this.fValue);
}
 
Example #14
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextF() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new FDistribution(12, 5));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextF(12, 5);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #15
Source File: SignificanceTask.java    From mzmine2 with GNU General Public License v2.0 4 votes vote down vote up
@Nullable
private Double oneWayAnova(@Nonnull double[][] intensityGroups) {

  int numGroups = intensityGroups.length;
  long numIntensities = Arrays.stream(intensityGroups)
      .flatMapToDouble(Arrays::stream)
      .count();

  double[] groupMeans = Arrays.stream(intensityGroups)
      .mapToDouble(intensities -> Arrays.stream(intensities).average().orElse(0.0))
      .toArray();

  double overallMean = Arrays.stream(intensityGroups)
      .flatMapToDouble(Arrays::stream)
      .average()
      .orElse(0.0);

  double sumOfSquaresOfError = IntStream.range(0, intensityGroups.length)
      .mapToDouble(i -> Arrays
          .stream(intensityGroups[i])
          .map(x -> x - groupMeans[i])
          .map(x -> x * x)
          .sum())
      .sum();

  double sumOfSquaresOfTreatment = (numGroups - 1) * Arrays.stream(groupMeans)
      .map(x -> x - overallMean)
      .map(x -> x * x)
      .sum();

  long degreesOfFreedomOfTreatment = numGroups - 1;
  long degreesOfFreedomOfError = numIntensities - numGroups;

  if (degreesOfFreedomOfTreatment <= 0 || degreesOfFreedomOfError <= 0) {
    return null;
  }

  double meanSquareOfTreatment = sumOfSquaresOfTreatment / degreesOfFreedomOfTreatment;
  double meanSquareOfError = sumOfSquaresOfError / degreesOfFreedomOfError;

  if (meanSquareOfError == 0.0) {
    return null;
  }

  double anovaStatistics = meanSquareOfTreatment / meanSquareOfError;

  Double pValue = null;
  try {
    FDistribution distribution = new FDistribution(
        degreesOfFreedomOfTreatment, degreesOfFreedomOfError);
    pValue = 1.0 - distribution.cumulativeProbability(anovaStatistics);
  }
  catch (MathIllegalArgumentException ex) {
    logger.warning("Error during F-distribution calculation: " + ex.getMessage());
  }

  return pValue;
}
 
Example #16
Source File: AnovaTask.java    From mzmine3 with GNU General Public License v2.0 4 votes vote down vote up
@Nullable
private Double oneWayAnova(@Nonnull double[][] intensityGroups) {

  int numGroups = intensityGroups.length;
  long numIntensities = Arrays.stream(intensityGroups).flatMapToDouble(Arrays::stream).count();

  double[] groupMeans = Arrays.stream(intensityGroups)
      .mapToDouble(intensities -> Arrays.stream(intensities).average().orElse(0.0)).toArray();

  double overallMean =
      Arrays.stream(intensityGroups).flatMapToDouble(Arrays::stream).average().orElse(0.0);

  double sumOfSquaresOfError = IntStream.range(0, intensityGroups.length).mapToDouble(
      i -> Arrays.stream(intensityGroups[i]).map(x -> x - groupMeans[i]).map(x -> x * x).sum())
      .sum();

  double sumOfSquaresOfTreatment =
      (numGroups - 1) * Arrays.stream(groupMeans).map(x -> x - overallMean).map(x -> x * x).sum();

  long degreesOfFreedomOfTreatment = numGroups - 1;
  long degreesOfFreedomOfError = numIntensities - numGroups;

  if (degreesOfFreedomOfTreatment <= 0 || degreesOfFreedomOfError <= 0) {
    return null;
  }

  double meanSquareOfTreatment = sumOfSquaresOfTreatment / degreesOfFreedomOfTreatment;
  double meanSquareOfError = sumOfSquaresOfError / degreesOfFreedomOfError;

  if (meanSquareOfError == 0.0) {
    return null;
  }

  double anovaStatistics = meanSquareOfTreatment / meanSquareOfError;

  Double pValue = null;
  try {
    FDistribution distribution =
        new FDistribution(degreesOfFreedomOfTreatment, degreesOfFreedomOfError);
    pValue = 1.0 - distribution.cumulativeProbability(anovaStatistics);
  } catch (MathIllegalArgumentException ex) {
    logger.warning("Error during F-distribution calculation: " + ex.getMessage());
  }

  return pValue;
}
 
Example #17
Source File: MFA_LSQFitter.java    From thunderstorm with GNU General Public License v3.0 4 votes vote down vote up
@Override
public Molecule fit(SubImage subimage) {
    double[] fittedParams = null;
    MultiPSF model, modelBest = null;
    double chi2, chi2Best = 0.0, pValue;
    if(maxN > 1) {
        // model selection - how many molecules?
        for(int n = 1; n <= maxN; n++) {
            GUI.checkIJEscapePressed();
            model = new MultiPSF(n, defaultSigma, basePsfModel, fittedParams);
            model.setIntensityRange(expectedIntensity);
            model.setFixedIntensities(sameI);
            LSQFitter fitter = new LSQFitter(model, weightedLSQ, MODEL_SELECTION_ITERATIONS, -1);
            fitter.fit(subimage);
            fittedParams = fitter.fittedParameters;
            chi2 = model.getChiSquared(subimage.xgrid, subimage.ygrid, subimage.values, fittedParams, weightedLSQ);
            if(n > 1) {
                try {
                    pValue = 1.0 - new FDistribution(model.getDoF() - modelBest.getDoF(), subimage.values.length - model.getDoF()).cumulativeProbability(((chi2Best - chi2) / (model.getDoF() - modelBest.getDoF())) / (chi2 / (subimage.values.length - model.getDoF())));
                    if(!Double.isNaN(pValue) && (pValue < pValueThr) ) {//&& !isOutOfRegion(mol, ((double)subimage.size) / 2.0)
                        modelBest = model;
                        chi2Best = chi2;
                    }
                } catch(NotStrictlyPositiveException ex) {
                    int maxMol = (subimage.values.length - 2) / (int)(basePsfModel.getDoF()-2); // both intensity and offset are estimated for all molecules as a single parameter
                    throw new StoppedDueToErrorException(
                            "F-distribution `F(" + (int)(model.getDoF() - modelBest.getDoF()) + "," +
                            (int)(subimage.values.length - model.getDoF()) + ")` could not be created! " +
                            "There is too many molecules (degrees of freedom) in the model!\n The maximum number of " +
                            "molecules with the current settings (PSF model and fitting radius) is " + maxMol + ".", ex);
                }
            } else {
                modelBest = model;
                chi2Best = chi2;
            }
        }
    } else {
        modelBest = new MultiPSF(1, defaultSigma, basePsfModel, null);
        modelBest.setIntensityRange(expectedIntensity);
        modelBest.setFixedIntensities(sameI);
    }
    // fitting with the selected model
    lastFitter = new LSQFitter(modelBest, weightedLSQ, Params.BACKGROUND);
    Molecule mol = lastFitter.fit(subimage);
    assert (mol != null);    // this is implication of `assert(maxN >= 1)`
    if(!mol.isSingleMolecule()) {
        // copy background value to all molecules
        double bkg = mol.getParam(PSFModel.Params.LABEL_BACKGROUND);
        for(Molecule m : mol.getDetections()) {
            m.setParam(PSFModel.Params.LABEL_BACKGROUND, bkg);
        }
    }
    return eliminateBadFits(mol, ((double) subimage.size_x) / 2.0 - defaultSigma / 2.0, ((double) subimage.size_y) / 2.0 - defaultSigma / 2.0);
}
 
Example #18
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #19
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #20
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 * @since 3.2
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #21
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #22
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 * @since 3.2
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #23
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #24
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #25
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #26
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #27
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    AnovaStats a = anovaStats(categoryData);
    FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #28
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of <code>double[]</code>
 * arrays.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * <code>double[]</code> arrays.</li>
 * <li> There must be at least two <code>double[]</code> arrays in the
 * <code>categoryData</code> collection and each of these arrays must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of <code>double[]</code>
 * arrays each containing data for one category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained <code>double[]</code> array does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 */
public double anovaPValue(final Collection<double[]> categoryData)
    throws NullArgumentException, DimensionMismatchException,
    ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData);
    // No try-catch or advertised exception because args are valid
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #29
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 * @since 3.2
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    final FDistribution fdist = new FDistribution(a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}
 
Example #30
Source File: OneWayAnova.java    From astor with GNU General Public License v2.0 3 votes vote down vote up
/**
 * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
 *
 * <p><strong>Preconditions</strong>: <ul>
 * <li>The categoryData <code>Collection</code> must contain
 * {@link SummaryStatistics}.</li>
 * <li> There must be at least two {@link SummaryStatistics} in the
 * <code>categoryData</code> collection and each of these statistics must
 * contain at least two values.</li></ul></p><p>
 * This implementation uses the
 * {@link org.apache.commons.math3.distribution.FDistribution
 * commons-math F Distribution implementation} to estimate the exact
 * p-value, using the formula<pre>
 *   p = 1 - cumulativeProbability(F)</pre>
 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
 * is the commons-math implementation of the F distribution.</p>
 *
 * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
 * each containing data for one category
 * @param allowOneElementData if true, allow computation for one catagory
 * only or for one data element per category
 * @return Pvalue
 * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
 * @throws DimensionMismatchException if the length of the <code>categoryData</code>
 * array is less than 2 or a contained {@link SummaryStatistics} does not have
 * at least two values
 * @throws ConvergenceException if the p-value can not be computed due to a convergence error
 * @throws MaxCountExceededException if the maximum number of iterations is exceeded
 * @since 3.2
 */
public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                          final boolean allowOneElementData)
    throws NullArgumentException, DimensionMismatchException,
           ConvergenceException, MaxCountExceededException {

    final AnovaStats a = anovaStats(categoryData, allowOneElementData);
    // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
    final FDistribution fdist = new FDistribution(null, a.dfbg, a.dfwg);
    return 1.0 - fdist.cumulativeProbability(a.F);

}