org.apache.commons.math3.stat.inference.TTest Java Examples

The following examples show how to use org.apache.commons.math3.stat.inference.TTest. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractExperimentRunner.java    From quaerite with Apache License 2.0 6 votes vote down vote up
private static void dumpSignificanceMatrices(String querySet,
                                             List<Scorer> targetScorers,
                                             ExperimentDB experimentDB,
                                             Path outputDir) throws Exception {
    TTest tTest = new TTest();
    for (Scorer scorer : targetScorers) {
        if (scorer instanceof AbstractJudgmentScorer &&
                ((AbstractJudgmentScorer) scorer).getExportPMatrix()) {
            Map<String, Double> aggregatedScores =
                    experimentDB.getKeyExperimentScore(scorer, querySet);

            Map<String, Double> sorted = MapUtil.sortByDescendingValue(aggregatedScores);
            List<String> experiments = new ArrayList();
            experiments.addAll(sorted.keySet());
            writeMatrix(tTest, (AbstractJudgmentScorer) scorer,
                    querySet, experiments, experimentDB, outputDir);
        }
    }
}
 
Example #2
Source File: AbstractExperimentRunner.java    From quaerite with Apache License 2.0 5 votes vote down vote up
private static double calcSignificance(TTest tTest, String querySet,
                                           Map<String, Double> scoresA, String experimentA,
                                           String experimentB, String scorer,
                                           ExperimentDB experimentDB) throws SQLException {

        Map<String, Double> scoresB = experimentDB.getScores(querySet, experimentB, scorer);
        if (scoresA.size() != scoresB.size()) {
            //log
            System.err.println("Different number of scores for " +
                    experimentA + "(" + scoresA.size() +
                    ") vs. " + experimentB + "(" + scoresB.size() + ")");
        }
        double[] arrA = new double[scoresA.size()];
        double[] arrB = new double[scoresB.size()];

        int i = 0;
        for (String query : scoresA.keySet()) {
            Double scoreA = scoresA.get(query);
            Double scoreB = scoresB.get(query);
            if (scoreA == null || scoreA < 0) {
                scoreA = 0.0d;
            }
            if (scoreB == null || scoreB < 0) {
                scoreB = 0.0d;
            }
            arrA[i] = scoreA;
            arrB[i] = scoreB;
            i++;
        }
//        WilcoxonSignedRankTest w = new WilcoxonSignedRankTest();
        //      w.wilcoxonSignedRankTest()
        if (arrA.length < 2) {
            LOG.warn("too few examples for t-test; returning -1");
            return -1;
        }
        return tTest.tTest(arrA, arrB);

    }
 
Example #3
Source File: PairedTTestEvaluator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Object doWork(Object value1, Object value2) throws IOException {

  TTest tTest = new TTest();
  Tuple tuple = new Tuple();
  if(value1 instanceof List) {
    @SuppressWarnings({"unchecked"})
    List<Number> values1 = (List<Number>)value1;
    double[] samples1 = new double[values1.size()];

    for(int i=0; i< samples1.length; i++) {
      samples1[i] = values1.get(i).doubleValue();
    }

    if(value2 instanceof List) {
      @SuppressWarnings({"unchecked"})
      List<Number> values2 = (List<Number>) value2;
      double[] samples2 = new double[values2.size()];

      for (int i = 0; i < samples2.length; i++) {
        samples2[i] = values2.get(i).doubleValue();
      }

      double tstat = tTest.pairedT(samples1, samples2);
      double pval = tTest.pairedTTest(samples1, samples2);
      tuple.put("t-statistic", tstat);
      tuple.put(StreamParams.P_VALUE, pval);
      return tuple;
    } else {
      throw new IOException("Second parameter for pairedTtest must be a double array");
    }
  } else {
    throw new IOException("First parameter for pairedTtest must be a double array");
  }
}
 
Example #4
Source File: ArrayOfDoublesSketchesTTestUDF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
/**
 * T-test on a given pair of ArrayOfDoublesSketches
 * @param serializedSketchA ArrayOfDoublesSketch in as serialized binary
 * @param serializedSketchB ArrayOfDoublesSketch in as serialized binary
 * @return list of p-values
 */
public List<Double> evaluate(final BytesWritable serializedSketchA, final BytesWritable serializedSketchB) {
  if ((serializedSketchA == null) || (serializedSketchB == null)) { return null; }
  final ArrayOfDoublesSketch sketchA =
      ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory(serializedSketchA));
  final ArrayOfDoublesSketch sketchB =
      ArrayOfDoublesSketches.wrapSketch(BytesWritableHelper.wrapAsMemory(serializedSketchB));

  if (sketchA.getNumValues() != sketchB.getNumValues()) {
    throw new IllegalArgumentException("Both sketches must have the same number of values");
  }

  // If the sketches contain fewer than 2 values, the p-value can't be calculated
  if ((sketchA.getRetainedEntries() < 2) || (sketchB.getRetainedEntries() < 2)) {
    return null;
  }

  final SummaryStatistics[] summariesA = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchA);
  final SummaryStatistics[] summariesB = ArrayOfDoublesSketchStats.sketchToSummaryStatistics(sketchB);

  final TTest tTest = new TTest();
  final List<Double> pValues = new ArrayList<>(sketchA.getNumValues());
  for (int i = 0; i < sketchA.getNumValues(); i++) {
    pValues.add(tTest.tTest(summariesA[i], summariesB[i]));
  }
  return pValues;
}
 
Example #5
Source File: AbstractExperimentRunner.java    From quaerite with Apache License 2.0 4 votes vote down vote up
private static void writeMatrix(TTest tTest, AbstractJudgmentScorer scorer,
                                String querySet,
                                List<String> experiments,
                                ExperimentDB experimentDB,
                                Path outputDir) throws Exception {

    String fileName = "sig_diffs_" + scorer.getName() + (
            (StringUtils.isBlank(querySet)) ? ".csv" : "_" + querySet + ".csv");

    List<String> matrixExperiments = new ArrayList<>();
    for (int i = 0; i < experiments.size() && i < MAX_MATRIX_COLS; i++) {
        matrixExperiments.add(experiments.get(i));
    }
    try (BufferedWriter writer = Files.newBufferedWriter(outputDir.resolve(fileName))) {

        for (String experiment : matrixExperiments) {
            writer.write(",");
            writer.write(experiment);
        }
        writer.write("\n");

        for (int i = 0; i < matrixExperiments.size(); i++) {
            String experimentA = matrixExperiments.get(i);
            writer.write(experimentA);
            for (int k = 0; k <= i; k++) {
                writer.write(",");
            }
            writer.write(String.format(Locale.US, "%.3G", 1.0d) + ",");//p-value of itself
            //map of query -> score for experiment A given this particular scorer
            Map<String, Double> scoresA = experimentDB.getScores(querySet,
                    experimentA, scorer.getName());
            for (int j = i + 1; j < matrixExperiments.size(); j++) {
                String experimentB = matrixExperiments.get(j);
                double significance =
                        calcSignificance(tTest, querySet, scoresA,
                                experimentA, experimentB,
                        scorer.getName(), experimentDB);
                writer.write(String.format(Locale.US, "%.3G", significance));
                writer.write(",");
            }
            writer.write("\n");
        }
    }
}
 
Example #6
Source File: HypothesisTestableMetric.java    From StreamingRec with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the result of a two-tailed paired t-test. 
 * Since n should always be greater 30, normality can be assumed.
 * @param otherAlgorithm -
 * @return the p-value result of a paired t-test
 */
public double getTTestPValue(HypothesisTestableMetric otherAlgorithm){
	return new TTest().pairedTTest(getDetailedResults().toDoubleArray(), otherAlgorithm.getDetailedResults().toDoubleArray());
}
 
Example #7
Source File: StatisticsUtil.java    From AILibs with GNU Affero General Public License v3.0 2 votes vote down vote up
/**
 * Carries out a two sample ttest to determine whether the distributions of the two given samples are significantly different. Requires the distributions to be a normal distribution respectively.
 *
 * @param valuesA The first sample..
 * @param valuesB The second sample.
 * @return True iff the difference is significant (p-value &lt; 0.05)
 */
public static boolean twoSampleTTestSignificance(final double[] valuesA, final double[] valuesB) {
	return new TTest().tTest(valuesA, valuesB, 0.05);
}