org.apache.commons.math3.distribution.TDistribution Java Examples

The following examples show how to use org.apache.commons.math3.distribution.TDistribution. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PearsonsCorrelationTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() throws Exception {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}
 
Example #2
Source File: PearsonsCorrelationTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}
 
Example #3
Source File: PearsonsCorrelationTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}
 
Example #4
Source File: PearsonsCorrelationTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}
 
Example #5
Source File: PearsonsCorrelation.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}
 
Example #6
Source File: PearsonsCorrelationTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}
 
Example #7
Source File: PearsonsCorrelation.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}
 
Example #8
Source File: PearsonsCorrelation.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}
 
Example #9
Source File: PearsonsCorrelationTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}
 
Example #10
Source File: PearsonsCorrelationTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}
 
Example #11
Source File: PearsonsCorrelation.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}
 
Example #12
Source File: PearsonsCorrelation.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}
 
Example #13
Source File: PearsonsCorrelationTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Verify that direct t-tests using standard error estimates are consistent
 * with reported p-values
 */
@Test
public void testStdErrorConsistency() {
    TDistribution tDistribution = new TDistribution(45);
    RealMatrix matrix = createRealMatrix(swissData, 47, 5);
    PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
    RealMatrix rValues = corrInstance.getCorrelationMatrix();
    RealMatrix pValues = corrInstance.getCorrelationPValues();
    RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < i; j++) {
            double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
            double p = 2 * (1 - tDistribution.cumulativeProbability(t));
            Assert.assertEquals(p, pValues.getEntry(i, j), 10E-15);
        }
    }
}
 
Example #14
Source File: PearsonsCorrelation.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}
 
Example #15
Source File: WeightedLeastSquaresRegression.java    From Strata with Apache License 2.0 5 votes vote down vote up
private LeastSquaresRegressionResult getResultWithStatistics(
    double[][] x, double[][] w, double[] y, double[] betas, double[] yModel,
    DoubleMatrix transpose, DoubleMatrix matrix, boolean useIntercept) {

  double yMean = 0.;
  for (double y1 : y) {
    yMean += y1;
  }
  yMean /= y.length;
  double totalSumOfSquares = 0.;
  double errorSumOfSquares = 0.;
  int n = x.length;
  int k = betas.length;
  double[] residuals = new double[n];
  double[] standardErrorsOfBeta = new double[k];
  double[] tStats = new double[k];
  double[] pValues = new double[k];
  for (int i = 0; i < n; i++) {
    totalSumOfSquares += w[i][i] * (y[i] - yMean) * (y[i] - yMean);
    residuals[i] = y[i] - yModel[i];
    errorSumOfSquares += w[i][i] * residuals[i] * residuals[i];
  }
  double regressionSumOfSquares = totalSumOfSquares - errorSumOfSquares;
  double[][] covarianceBetas = convertArray(ALGEBRA.getInverse(ALGEBRA.multiply(transpose, matrix)).toArray());
  double rSquared = regressionSumOfSquares / totalSumOfSquares;
  double adjustedRSquared = 1. - (1 - rSquared) * (n - 1) / (n - k);
  double meanSquareError = errorSumOfSquares / (n - k);
  TDistribution studentT = new TDistribution(n - k);
  for (int i = 0; i < k; i++) {
    standardErrorsOfBeta[i] = Math.sqrt(meanSquareError * covarianceBetas[i][i]);
    tStats[i] = betas[i] / standardErrorsOfBeta[i];
    pValues[i] = 1 - studentT.cumulativeProbability(Math.abs(tStats[i]));
  }
  return new WeightedLeastSquaresRegressionResult(
      betas, residuals, meanSquareError, standardErrorsOfBeta, rSquared, adjustedRSquared, tStats, pValues, useIntercept);
}
 
Example #16
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextT() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new TDistribution(10));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextT(10);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #17
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextT() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new TDistribution(10));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextT(10);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #18
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextT() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new TDistribution(10));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextT(10);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #19
Source File: EstimateRepairability.java    From BART with MIT License 5 votes vote down vote up
private static double calcMeanCI(SummaryStatistics stats, double level) {
    try {
        TDistribution tDist = new TDistribution(stats.getN() - 1);
        double critVal = tDist.inverseCumulativeProbability(1.0 - (1 - level) / 2);
        return critVal * stats.getStandardDeviation() / Math.sqrt(stats.getN());
    } catch (MathIllegalArgumentException e) {
        return Double.NaN;
    }
}
 
Example #20
Source File: ConfidenceInterval.java    From rival with Apache License 2.0 5 votes vote down vote up
/**
 * Adapted from https://gist.github.com/gcardone/5536578.
 *
 * @param alpha probability of incorrectly rejecting the null hypothesis (1
 * - confidence_level)
 * @param df degrees of freedom
 * @param n number of observations
 * @param std standard deviation
 * @param mean mean
 * @return array with the confidence interval: [mean - margin of error, mean
 * + margin of error]
 */
public static double[] getConfidenceInterval(final double alpha, final int df, final int n, final double std, final double mean) {
    // Create T Distribution with df degrees of freedom
    TDistribution tDist = new TDistribution(df);
    // Calculate critical value
    double critVal = tDist.inverseCumulativeProbability(1.0 - alpha);
    // Calculate confidence interval
    double ci = critVal * std / Math.sqrt(n);
    double lower = mean - ci;
    double upper = mean + ci;
    double[] interval = new double[]{lower, upper};
    return interval;
}
 
Example #21
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextT() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new TDistribution(10));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextT(10);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #22
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextT() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new TDistribution(10));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextT(10);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #23
Source File: XDataFrameLeastSquares.java    From morpheus-core with Apache License 2.0 5 votes vote down vote up
/**
 * Computes the T-stats and the P-Value for all regression parameters
 */
private void computeParameterSignificance(RealVector betaVector) {
    try {
        final double residualDF = frame.rows().count() - (regressors.size() + 1);
        final TDistribution distribution = new TDistribution(residualDF);
        final double interceptParam = betaVector.getEntry(0);
        final double interceptStdError = intercept.data().getDouble(0, Field.STD_ERROR);
        final double interceptTStat = interceptParam / interceptStdError;
        final double interceptPValue = distribution.cumulativeProbability(-Math.abs(interceptTStat)) * 2d;
        final double interceptCI = interceptStdError * distribution.inverseCumulativeProbability(1d - alpha / 2d);
        this.intercept.data().setDouble(0, Field.PARAMETER, interceptParam);
        this.intercept.data().setDouble(0, Field.T_STAT, interceptTStat);
        this.intercept.data().setDouble(0, Field.P_VALUE, interceptPValue);
        this.intercept.data().setDouble(0, Field.CI_LOWER, interceptParam - interceptCI);
        this.intercept.data().setDouble(0, Field.CI_UPPER, interceptParam + interceptCI);
        final int offset = hasIntercept() ? 1 : 0;
        for (int i=0; i<regressors.size(); ++i) {
            final C regressor = regressors.get(i);
            final double betaParam = betaVector.getEntry(i + offset);
            final double betaStdError = betas.data().getDouble(regressor, Field.STD_ERROR);
            final double tStat = betaParam / betaStdError;
            final double pValue = distribution.cumulativeProbability(-Math.abs(tStat)) * 2d;
            final double betaCI = betaStdError * distribution.inverseCumulativeProbability(1d - alpha / 2d);
            this.betas.data().setDouble(regressor, Field.PARAMETER, betaParam);
            this.betas.data().setDouble(regressor, Field.T_STAT, tStat);
            this.betas.data().setDouble(regressor, Field.P_VALUE, pValue);
            this.betas.data().setDouble(regressor, Field.CI_LOWER, betaParam - betaCI);
            this.betas.data().setDouble(regressor, Field.CI_UPPER, betaParam + betaCI);
        }
    } catch (Exception ex) {
        throw new DataFrameException("Failed to compute regression coefficient t-stats and p-values", ex);
    }
}
 
Example #24
Source File: RandomDataTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextT() throws Exception {
    double[] quartiles = TestUtils.getDistributionQuartiles(new TDistribution(10));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextT(10);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #25
Source File: RandomDataGeneratorTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testNextT() {
    double[] quartiles = TestUtils.getDistributionQuartiles(new TDistribution(10));
    long[] counts = new long[4];
    randomData.reSeed(1000);
    for (int i = 0; i < 1000; i++) {
        double value = randomData.nextT(10);
        TestUtils.updateCounts(value, counts, quartiles);
    }
    TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
 
Example #26
Source File: GTSOutliersHelper.java    From warp10-platform with Apache License 2.0 4 votes vote down vote up
/**
 * Applying Grubbs' test using mean/std or median/mad
 * @see http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h1.htm
 * 
 * @param gts
 * @param useMedian     Should the test use median/mad instead of mean/std
 * @param alpha   Significance level with which to accept or reject anomalies. Default is 0.05
 * 
 * @return anomalous_ticks
 * 
 * @throws WarpScriptException
 */
public static List<Long> grubbsTest(GeoTimeSerie gts, boolean useMedian, double alpha) throws WarpScriptException {
  doubleCheck(gts);
  List<Long> anomalous_ticks = new ArrayList<Long>();
  

  int N = gts.values;
  if (N < 3) {
    // no anomalous tick in this case
    return anomalous_ticks;
  }
  
  double[] musigma = madsigma(gts, useMedian);
  double m = musigma[0];
  double std = musigma[1];
  if (0.0D == std) {
    return anomalous_ticks;
  }
  
  double z = 0.0D;
  double max = Double.NEGATIVE_INFINITY;
  long suspicious_tick = 0L;
  for (int i = 0; i < N; i++) {
    z = Math.abs((gts.doubleValues[i] - m) / std);
    if (z > max) {
      max = z;
      suspicious_tick = gts.ticks[i];
    }
  }
  
  //
  // Calculate critical value
  //
  
  double t = new TDistribution(N - 2).inverseCumulativeProbability(alpha / (2 * N));
  
  //
  // Calculate threshold
  //
  
  double Ginf = (N - 1) * Math.abs(t) / Math.sqrt(N * (N - 2 + t * t));
  
  //
  // Test
  //
  
  if (max > Ginf) {
    anomalous_ticks.add(suspicious_tick);
  }
  
  return anomalous_ticks;    
}
 
Example #27
Source File: GTSOutliersHelper.java    From warp10-platform with Apache License 2.0 4 votes vote down vote up
/**
 * Applying generalized extreme Studentized deviate test using mean/std or median/mad
 * @see http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h3.htm
 * 
 * @param gts
 * @param k       Upper bound of suspected number of outliers
 * @param useMedian     Should the test use median/mad instead of mean/std
 * @param alpha   Significance level with which to accept or reject anomalies. Default is 0.05
 * 
 * @return anomalous_ticks
 * 
 * @throws WarpScriptException
 */
public static List<Long> ESDTest(GeoTimeSerie gts, int k, boolean useMedian, double alpha) throws WarpScriptException {
  doubleCheck(gts);
  
  // Clone GTS (not necessary but simplifies implementation) -> copy only needed fields
  //GeoTimeSerie clone = gts.clone();
  GeoTimeSerie clone = new GeoTimeSerie();
  clone.type = gts.type;
  clone.values = gts.values;
  clone.doubleValues = Arrays.copyOf(gts.doubleValues, gts.values);
  clone.ticks = Arrays.copyOf(gts.ticks, gts.values);
  
  List<Long> anomalous_ticks = new ArrayList<Long>();
  
  int greater_j_test_passed = -1;
  for (int j = 0; j < k; j++) {
    
    int N = clone.values;
    if (N < 3) {
      // In this case there are no more outlier left
      break;
    }
    
    double[] musigma = madsigma(clone, useMedian);
    double m = musigma[0];
    double std = musigma[1];
    
    if (0.0D == std) {
      // In this case there are no more outlier left
      break;
    }
    
    double z = 0.0D;
    double max = Double.NEGATIVE_INFINITY;
    int suspicious_idx = 0;
    
    for (int i = 0; i < N; i++) {
      z = Math.abs((clone.doubleValues[i] - m) / std);
      if (z > max) {
        max = z;
        suspicious_idx = i;
      }
    }
    
    //
    // Calculate critical value
    //
    
    double p =  1 - alpha / (2 * N);
    double t = new TDistribution(N - 2).inverseCumulativeProbability(p);
    
    //
    // Calculate threshold
    //
    
    double lambda = (N - 1) * t / Math.sqrt((N - 2 + t * t) * N);
    
    //
    // Test
    //
    
    if (max > lambda) {
      greater_j_test_passed = j;
    }
    
    //
    // Removing potential outlier before next loop
    //
    
    clone.values--;
    
    // We swap it with last point
    long tmp_tick = clone.ticks[suspicious_idx];
    clone.ticks[suspicious_idx] = clone.ticks[clone.values];
    clone.ticks[clone.values] = tmp_tick;
    
    // We don't need to keep the value of the potential outlier
    clone.doubleValues[suspicious_idx] = clone.doubleValues[clone.values];
  }
  
  // adding to output
  for (int j = 0; j <= greater_j_test_passed; j++) {
    anomalous_ticks.add(clone.ticks[gts.values - 1 - j]);
  }
  
  return anomalous_ticks;
}
 
Example #28
Source File: TopKEstimator.java    From vespa with Apache License 2.0 4 votes vote down vote up
public TopKEstimator(double freedom, double defaultProbability, double skewFactor) {
    this.studentT = new TDistribution(null, freedom);
    defaultP = defaultProbability;
    estimate = needEstimate(defaultP);
    this.skewFactor = skewFactor;
}
 
Example #29
Source File: PearsonsCorrelation.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Returns a matrix of p-values associated with the (two-sided) null
 * hypothesis that the corresponding correlation coefficient is zero.
 *
 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
 * a value with absolute value greater than or equal to <br>
 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
 *
 * <p>The values in the matrix are sometimes referred to as the
 * <i>significance</i> of the corresponding correlation coefficients.</p>
 *
 * <p>To use this method, one of the constructors that supply an input
 * matrix must have been used to create this instance.</p>
 *
 * @return matrix of p-values
 * @throws org.apache.commons.math3.exception.MaxCountExceededException
 * if an error occurs estimating probabilities
 * @throws NullPointerException if this instance was created with no data
 */
public RealMatrix getCorrelationPValues() {
    TDistribution tDistribution = new TDistribution(nObs - 2);
    int nVars = correlationMatrix.getColumnDimension();
    double[][] out = new double[nVars][nVars];
    for (int i = 0; i < nVars; i++) {
        for (int j = 0; j < nVars; j++) {
            if (i == j) {
                out[i][j] = 0d;
            } else {
                double r = correlationMatrix.getEntry(i, j);
                double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
                out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
            }
        }
    }
    return new BlockRealMatrix(out);
}
 
Example #30
Source File: RandomWalkSamplerTest.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@Test
public void testBasics() throws IOException {
    // this sampler has four variables
    // g1 is gamma distributed with alpha = 0.2, beta = 0.2
    // v1 is unit normal
    // v2 is normal with mean = 0, sd = 2
    // v3 is gamma-normal with dof=2, mean = 0.
    SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema015.json"), Charsets.UTF_8).read());

    TDigest tdG1 = new AVLTreeDigest(500);
    TDigest tdG2 = new AVLTreeDigest(500);
    TDigest td1 = new AVLTreeDigest(500);
    TDigest td2 = new AVLTreeDigest(500);
    TDigest td3 = new AVLTreeDigest(500);

    double x1 = 0;
    double x2 = 0;
    double x3 = 0;

    for (int i = 0; i < 1000000; i++) {
        JsonNode r = s.sample();
        tdG1.add(r.get("g1").asDouble());
        tdG2.add(r.get("g2").asDouble());

        double step1 = r.get("v1").get("step").asDouble();
        td1.add(step1);
        x1 += step1;
        assertEquals(x1, r.get("v1").get("value").asDouble(), 0);
        assertEquals(x1, r.get("v1-bare").asDouble(), 0);

        double step2 = r.get("v2").get("step").asDouble();
        td2.add(step2);
        x2 += step2;
        assertEquals(x2, r.get("v2").get("value").asDouble(), 0);

        double step3 = r.get("v3").get("step").asDouble();
        td3.add(step3);
        x3 += step3;
        assertEquals(x3, r.get("v3").get("value").asDouble(), 0);
    }

    // now compare against reference distributions to test accuracy of the observed step distributions
    NormalDistribution normalDistribution = new NormalDistribution();
    GammaDistribution gd1 = new GammaDistribution(0.2, 5);
    GammaDistribution gd2 = new GammaDistribution(1, 1);
    TDistribution tDistribution = new TDistribution(2);
    for (double q : new double[]{0.001, 0.01, 0.1, 0.2, 0.5, 0.8, 0.9, 0.99, 0.99}) {
        double uG1 = gd1.cumulativeProbability(tdG1.quantile(q));
        assertEquals(q, uG1, (1 - q) * q * 10e-2);

        double uG2 = gd2.cumulativeProbability(tdG2.quantile(q));
        assertEquals(q, uG2, (1 - q) * q * 10e-2);

        double u1 = normalDistribution.cumulativeProbability(td1.quantile(q));
        assertEquals(q, u1, (1 - q) * q * 10e-2);

        double u2 = normalDistribution.cumulativeProbability(td2.quantile(q) / 2);
        assertEquals(q, u2, (1 - q) * q * 10e-2);

        double u3 = tDistribution.cumulativeProbability(td3.quantile(q));
        assertEquals(q, u3, (1 - q) * q * 10e-2);
    }
}