Java Code Examples for org.apache.flink.api.java.summarize.NumericColumnSummary

The following examples show how to use org.apache.flink.api.java.summarize.NumericColumnSummary. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: NumericSummaryAggregator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public NumericColumnSummary<T> result() {

	Double variance = null;
	if (nonMissingCount > 1) {
		variance = m2.value() / (nonMissingCount - 1);
	}

	return new NumericColumnSummary<T>(
		nonMissingCount,
		nullCount,
		nanCount,
		infinityCount,
		// if nonMissingCount was zero some fields should be undefined
		nonMissingCount == 0 ? null : min.result(),
		nonMissingCount == 0 ? null : max.result(),
		nonMissingCount == 0 ? null : sum.result(),
		nonMissingCount == 0 ? null : mean.value(),
		variance,
		variance == null ? null : Math.sqrt(variance) // standard deviation
	);
}
 
Example 2
Source Project: Flink-CEPplus   Source File: LongSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 */
protected NumericColumnSummary<Long> summarize(Long... values) {
	return new AggregateCombineHarness<Long, NumericColumnSummary<Long>, LongSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Long> result1, NumericColumnSummary<Long> result2) {

			Assert.assertEquals(result1.getTotalCount(), result2.getTotalCount());
			Assert.assertEquals(result1.getNullCount(), result2.getNullCount());
			Assert.assertEquals(result1.getMissingCount(), result2.getMissingCount());
			Assert.assertEquals(result1.getNonMissingCount(), result2.getNonMissingCount());
			Assert.assertEquals(result1.getInfinityCount(), result2.getInfinityCount());
			Assert.assertEquals(result1.getNanCount(), result2.getNanCount());

			Assert.assertEquals(result1.containsNull(), result2.containsNull());
			Assert.assertEquals(result1.containsNonNull(), result2.containsNonNull());

			Assert.assertEquals(result1.getMin().longValue(), result2.getMin().longValue());
			Assert.assertEquals(result1.getMax().longValue(), result2.getMax().longValue());
			Assert.assertEquals(result1.getSum().longValue(), result2.getSum().longValue());
			Assert.assertEquals(result1.getMean().doubleValue(), result2.getMean().doubleValue(), 1e-12d);
			Assert.assertEquals(result1.getVariance().doubleValue(), result2.getVariance().doubleValue(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation().doubleValue(), result2.getStandardDeviation().doubleValue(), 1e-12d);
		}
	}.summarize(values);
}
 
Example 3
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating and combining
 * a bunch of different ways.
 */
protected NumericColumnSummary<Double> summarize(Double... values) {

	DoubleValue[] doubleValues = new DoubleValue[values.length];
	for (int i = 0; i < values.length; i++) {
		if (values[i] != null) {
			doubleValues[i] = new DoubleValue(values[i]);
		}
	}

	return new AggregateCombineHarness<DoubleValue, NumericColumnSummary<Double>, ValueSummaryAggregator.DoubleValueSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Double> result1, NumericColumnSummary<Double> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-12d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-12d);
		}

	}.summarize(doubleValues);
}
 
Example 4
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating
 * and combining a bunch of different ways.
 */
@Override
protected NumericColumnSummary<Float> summarize(Float... values) {

	FloatValue[] floatValues = new FloatValue[values.length];
	for (int i = 0; i < values.length; i++) {
		if (values[i] != null) {
			floatValues[i] = new FloatValue(values[i]);
		}
	}

	return new AggregateCombineHarness<FloatValue, NumericColumnSummary<Float>, ValueSummaryAggregator.FloatValueSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Float> result1, NumericColumnSummary<Float> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0f);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0f);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-10d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-10d);
		}

	}.summarize(floatValues);
}
 
Example 5
Source Project: Flink-CEPplus   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetXValues() throws Exception {

	final Float[] q1x = { 10.0f, 8.0f, 13.0f, 9.0f, 11.0f, 14.0f, 6.0f, 4.0f, 12.0f, 7.0f, 5.0f };
	final Float[] q4x = { 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 19.0f, 8.0f, 8.0f, 8.0f };

	NumericColumnSummary<Float> q1 = summarize(q1x);
	NumericColumnSummary<Float> q4 = summarize(q4x);

	Assert.assertEquals(9.0, q1.getMean().doubleValue(), 0.0f);
	Assert.assertEquals(9.0, q4.getMean().doubleValue(), 0.0f);

	Assert.assertEquals(11.0, q1.getVariance().doubleValue(), 1e-10d);
	Assert.assertEquals(11.0, q4.getVariance().doubleValue(), 1e-10d);

	double stddev = Math.sqrt(11.0f);
	Assert.assertEquals(stddev, q1.getStandardDeviation().doubleValue(), 1e-10d);
	Assert.assertEquals(stddev, q4.getStandardDeviation().doubleValue(), 1e-10d);
}
 
Example 6
Source Project: Flink-CEPplus   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetYValues() throws Exception {
	final Float[] q1y = { 8.04f, 6.95f, 7.58f, 8.81f, 8.33f, 9.96f, 7.24f, 4.26f, 10.84f, 4.82f, 5.68f };
	final Float[] q2y = { 9.14f, 8.14f, 8.74f, 8.77f, 9.26f, 8.1f, 6.13f, 3.1f, 9.13f, 7.26f, 4.74f };
	final Float[] q3y = { 7.46f, 6.77f, 12.74f, 7.11f, 7.81f, 8.84f, 6.08f, 5.39f, 8.15f, 6.42f, 5.73f };
	final Float[] q4y = { 6.58f, 5.76f, 7.71f, 8.84f, 8.47f, 7.04f, 5.25f, 12.5f, 5.56f, 7.91f, 6.89f };

	NumericColumnSummary<Float> q1 = summarize(q1y);
	NumericColumnSummary<Float> q2 = summarize(q2y);
	NumericColumnSummary<Float> q3 = summarize(q3y);
	NumericColumnSummary<Float> q4 = summarize(q4y);

	// the y values are have less precisely matching means and variances

	Assert.assertEquals(7.5, q1.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q2.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q3.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q4.getMean().doubleValue(), 0.001);

	Assert.assertEquals(4.12, q1.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q2.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q3.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q4.getVariance().doubleValue(), 0.01);
}
 
Example 7
Source Project: Flink-CEPplus   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating
 * and combining a bunch of different ways.
 */
protected NumericColumnSummary<Float> summarize(Float... values) {

	return new AggregateCombineHarness<Float, NumericColumnSummary<Float>, FloatSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Float> result1, NumericColumnSummary<Float> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0f);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0f);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-12d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-12d);
		}

	}.summarize(values);
}
 
Example 8
Source Project: Flink-CEPplus   Source File: DoubleSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetXValues() throws Exception {

	final Double[] q1x = { 10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0 };
	final Double[] q4x = { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0 };

	NumericColumnSummary<Double> q1 = summarize(q1x);
	NumericColumnSummary<Double> q4 = summarize(q4x);

	Assert.assertEquals(9.0, q1.getMean().doubleValue(), 0.0);
	Assert.assertEquals(9.0, q4.getMean().doubleValue(), 0.0);

	Assert.assertEquals(11.0, q1.getVariance().doubleValue(), 1e-10d);
	Assert.assertEquals(11.0, q4.getVariance().doubleValue(), 1e-10d);

	double stddev = Math.sqrt(11.0);
	Assert.assertEquals(stddev, q1.getStandardDeviation().doubleValue(), 1e-10d);
	Assert.assertEquals(stddev, q4.getStandardDeviation().doubleValue(), 1e-10d);
}
 
Example 9
Source Project: Flink-CEPplus   Source File: DoubleSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetYValues() throws Exception {
	final Double[] q1y = { 8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68 };
	final Double[] q2y = { 9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74 };
	final Double[] q3y = { 7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73 };
	final Double[] q4y = { 6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89 };

	NumericColumnSummary<Double> q1 = summarize(q1y);
	NumericColumnSummary<Double> q2 = summarize(q2y);
	NumericColumnSummary<Double> q3 = summarize(q3y);
	NumericColumnSummary<Double> q4 = summarize(q4y);

	// the y values are have less precisely matching means and variances

	Assert.assertEquals(7.5, q1.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q2.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q3.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q4.getMean().doubleValue(), 0.001);

	Assert.assertEquals(4.12, q1.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q2.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q3.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q4.getVariance().doubleValue(), 0.01);
}
 
Example 10
Source Project: flink   Source File: NumericSummaryAggregator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public NumericColumnSummary<T> result() {

	Double variance = null;
	if (nonMissingCount > 1) {
		variance = m2.value() / (nonMissingCount - 1);
	}

	return new NumericColumnSummary<T>(
		nonMissingCount,
		nullCount,
		nanCount,
		infinityCount,
		// if nonMissingCount was zero some fields should be undefined
		nonMissingCount == 0 ? null : min.result(),
		nonMissingCount == 0 ? null : max.result(),
		nonMissingCount == 0 ? null : sum.result(),
		nonMissingCount == 0 ? null : mean.value(),
		variance,
		variance == null ? null : Math.sqrt(variance) // standard deviation
	);
}
 
Example 11
Source Project: flink   Source File: LongSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 */
protected NumericColumnSummary<Long> summarize(Long... values) {
	return new AggregateCombineHarness<Long, NumericColumnSummary<Long>, LongSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Long> result1, NumericColumnSummary<Long> result2) {

			Assert.assertEquals(result1.getTotalCount(), result2.getTotalCount());
			Assert.assertEquals(result1.getNullCount(), result2.getNullCount());
			Assert.assertEquals(result1.getMissingCount(), result2.getMissingCount());
			Assert.assertEquals(result1.getNonMissingCount(), result2.getNonMissingCount());
			Assert.assertEquals(result1.getInfinityCount(), result2.getInfinityCount());
			Assert.assertEquals(result1.getNanCount(), result2.getNanCount());

			Assert.assertEquals(result1.containsNull(), result2.containsNull());
			Assert.assertEquals(result1.containsNonNull(), result2.containsNonNull());

			Assert.assertEquals(result1.getMin().longValue(), result2.getMin().longValue());
			Assert.assertEquals(result1.getMax().longValue(), result2.getMax().longValue());
			Assert.assertEquals(result1.getSum().longValue(), result2.getSum().longValue());
			Assert.assertEquals(result1.getMean().doubleValue(), result2.getMean().doubleValue(), 1e-12d);
			Assert.assertEquals(result1.getVariance().doubleValue(), result2.getVariance().doubleValue(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation().doubleValue(), result2.getStandardDeviation().doubleValue(), 1e-12d);
		}
	}.summarize(values);
}
 
Example 12
Source Project: flink   Source File: DoubleValueSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating and combining
 * a bunch of different ways.
 */
protected NumericColumnSummary<Double> summarize(Double... values) {

	DoubleValue[] doubleValues = new DoubleValue[values.length];
	for (int i = 0; i < values.length; i++) {
		if (values[i] != null) {
			doubleValues[i] = new DoubleValue(values[i]);
		}
	}

	return new AggregateCombineHarness<DoubleValue, NumericColumnSummary<Double>, ValueSummaryAggregator.DoubleValueSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Double> result1, NumericColumnSummary<Double> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-12d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-12d);
		}

	}.summarize(doubleValues);
}
 
Example 13
Source Project: flink   Source File: FloatValueSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating
 * and combining a bunch of different ways.
 */
@Override
protected NumericColumnSummary<Float> summarize(Float... values) {

	FloatValue[] floatValues = new FloatValue[values.length];
	for (int i = 0; i < values.length; i++) {
		if (values[i] != null) {
			floatValues[i] = new FloatValue(values[i]);
		}
	}

	return new AggregateCombineHarness<FloatValue, NumericColumnSummary<Float>, ValueSummaryAggregator.FloatValueSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Float> result1, NumericColumnSummary<Float> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0f);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0f);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-10d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-10d);
		}

	}.summarize(floatValues);
}
 
Example 14
Source Project: flink   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetXValues() throws Exception {

	final Float[] q1x = { 10.0f, 8.0f, 13.0f, 9.0f, 11.0f, 14.0f, 6.0f, 4.0f, 12.0f, 7.0f, 5.0f };
	final Float[] q4x = { 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 19.0f, 8.0f, 8.0f, 8.0f };

	NumericColumnSummary<Float> q1 = summarize(q1x);
	NumericColumnSummary<Float> q4 = summarize(q4x);

	Assert.assertEquals(9.0, q1.getMean().doubleValue(), 0.0f);
	Assert.assertEquals(9.0, q4.getMean().doubleValue(), 0.0f);

	Assert.assertEquals(11.0, q1.getVariance().doubleValue(), 1e-10d);
	Assert.assertEquals(11.0, q4.getVariance().doubleValue(), 1e-10d);

	double stddev = Math.sqrt(11.0f);
	Assert.assertEquals(stddev, q1.getStandardDeviation().doubleValue(), 1e-10d);
	Assert.assertEquals(stddev, q4.getStandardDeviation().doubleValue(), 1e-10d);
}
 
Example 15
Source Project: flink   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetYValues() throws Exception {
	final Float[] q1y = { 8.04f, 6.95f, 7.58f, 8.81f, 8.33f, 9.96f, 7.24f, 4.26f, 10.84f, 4.82f, 5.68f };
	final Float[] q2y = { 9.14f, 8.14f, 8.74f, 8.77f, 9.26f, 8.1f, 6.13f, 3.1f, 9.13f, 7.26f, 4.74f };
	final Float[] q3y = { 7.46f, 6.77f, 12.74f, 7.11f, 7.81f, 8.84f, 6.08f, 5.39f, 8.15f, 6.42f, 5.73f };
	final Float[] q4y = { 6.58f, 5.76f, 7.71f, 8.84f, 8.47f, 7.04f, 5.25f, 12.5f, 5.56f, 7.91f, 6.89f };

	NumericColumnSummary<Float> q1 = summarize(q1y);
	NumericColumnSummary<Float> q2 = summarize(q2y);
	NumericColumnSummary<Float> q3 = summarize(q3y);
	NumericColumnSummary<Float> q4 = summarize(q4y);

	// the y values are have less precisely matching means and variances

	Assert.assertEquals(7.5, q1.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q2.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q3.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q4.getMean().doubleValue(), 0.001);

	Assert.assertEquals(4.12, q1.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q2.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q3.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q4.getVariance().doubleValue(), 0.01);
}
 
Example 16
Source Project: flink   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating
 * and combining a bunch of different ways.
 */
protected NumericColumnSummary<Float> summarize(Float... values) {

	return new AggregateCombineHarness<Float, NumericColumnSummary<Float>, FloatSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Float> result1, NumericColumnSummary<Float> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0f);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0f);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-12d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-12d);
		}

	}.summarize(values);
}
 
Example 17
Source Project: flink   Source File: DoubleSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetXValues() throws Exception {

	final Double[] q1x = { 10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0 };
	final Double[] q4x = { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0 };

	NumericColumnSummary<Double> q1 = summarize(q1x);
	NumericColumnSummary<Double> q4 = summarize(q4x);

	Assert.assertEquals(9.0, q1.getMean().doubleValue(), 0.0);
	Assert.assertEquals(9.0, q4.getMean().doubleValue(), 0.0);

	Assert.assertEquals(11.0, q1.getVariance().doubleValue(), 1e-10d);
	Assert.assertEquals(11.0, q4.getVariance().doubleValue(), 1e-10d);

	double stddev = Math.sqrt(11.0);
	Assert.assertEquals(stddev, q1.getStandardDeviation().doubleValue(), 1e-10d);
	Assert.assertEquals(stddev, q4.getStandardDeviation().doubleValue(), 1e-10d);
}
 
Example 18
Source Project: flink   Source File: DoubleSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetYValues() throws Exception {
	final Double[] q1y = { 8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68 };
	final Double[] q2y = { 9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74 };
	final Double[] q3y = { 7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73 };
	final Double[] q4y = { 6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89 };

	NumericColumnSummary<Double> q1 = summarize(q1y);
	NumericColumnSummary<Double> q2 = summarize(q2y);
	NumericColumnSummary<Double> q3 = summarize(q3y);
	NumericColumnSummary<Double> q4 = summarize(q4y);

	// the y values are have less precisely matching means and variances

	Assert.assertEquals(7.5, q1.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q2.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q3.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q4.getMean().doubleValue(), 0.001);

	Assert.assertEquals(4.12, q1.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q2.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q3.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q4.getVariance().doubleValue(), 0.01);
}
 
Example 19
Source Project: flink   Source File: NumericSummaryAggregator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public NumericColumnSummary<T> result() {

	Double variance = null;
	if (nonMissingCount > 1) {
		variance = m2.value() / (nonMissingCount - 1);
	}

	return new NumericColumnSummary<T>(
		nonMissingCount,
		nullCount,
		nanCount,
		infinityCount,
		// if nonMissingCount was zero some fields should be undefined
		nonMissingCount == 0 ? null : min.result(),
		nonMissingCount == 0 ? null : max.result(),
		nonMissingCount == 0 ? null : sum.result(),
		nonMissingCount == 0 ? null : mean.value(),
		variance,
		variance == null ? null : Math.sqrt(variance) // standard deviation
	);
}
 
Example 20
Source Project: flink   Source File: LongSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 */
protected NumericColumnSummary<Long> summarize(Long... values) {
	return new AggregateCombineHarness<Long, NumericColumnSummary<Long>, LongSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Long> result1, NumericColumnSummary<Long> result2) {

			Assert.assertEquals(result1.getTotalCount(), result2.getTotalCount());
			Assert.assertEquals(result1.getNullCount(), result2.getNullCount());
			Assert.assertEquals(result1.getMissingCount(), result2.getMissingCount());
			Assert.assertEquals(result1.getNonMissingCount(), result2.getNonMissingCount());
			Assert.assertEquals(result1.getInfinityCount(), result2.getInfinityCount());
			Assert.assertEquals(result1.getNanCount(), result2.getNanCount());

			Assert.assertEquals(result1.containsNull(), result2.containsNull());
			Assert.assertEquals(result1.containsNonNull(), result2.containsNonNull());

			Assert.assertEquals(result1.getMin().longValue(), result2.getMin().longValue());
			Assert.assertEquals(result1.getMax().longValue(), result2.getMax().longValue());
			Assert.assertEquals(result1.getSum().longValue(), result2.getSum().longValue());
			Assert.assertEquals(result1.getMean().doubleValue(), result2.getMean().doubleValue(), 1e-12d);
			Assert.assertEquals(result1.getVariance().doubleValue(), result2.getVariance().doubleValue(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation().doubleValue(), result2.getStandardDeviation().doubleValue(), 1e-12d);
		}
	}.summarize(values);
}
 
Example 21
Source Project: flink   Source File: DoubleValueSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating and combining
 * a bunch of different ways.
 */
protected NumericColumnSummary<Double> summarize(Double... values) {

	DoubleValue[] doubleValues = new DoubleValue[values.length];
	for (int i = 0; i < values.length; i++) {
		if (values[i] != null) {
			doubleValues[i] = new DoubleValue(values[i]);
		}
	}

	return new AggregateCombineHarness<DoubleValue, NumericColumnSummary<Double>, ValueSummaryAggregator.DoubleValueSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Double> result1, NumericColumnSummary<Double> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-12d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-12d);
		}

	}.summarize(doubleValues);
}
 
Example 22
Source Project: flink   Source File: FloatValueSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating
 * and combining a bunch of different ways.
 */
@Override
protected NumericColumnSummary<Float> summarize(Float... values) {

	FloatValue[] floatValues = new FloatValue[values.length];
	for (int i = 0; i < values.length; i++) {
		if (values[i] != null) {
			floatValues[i] = new FloatValue(values[i]);
		}
	}

	return new AggregateCombineHarness<FloatValue, NumericColumnSummary<Float>, ValueSummaryAggregator.FloatValueSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Float> result1, NumericColumnSummary<Float> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0f);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0f);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-10d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-10d);
		}

	}.summarize(floatValues);
}
 
Example 23
Source Project: flink   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetXValues() throws Exception {

	final Float[] q1x = { 10.0f, 8.0f, 13.0f, 9.0f, 11.0f, 14.0f, 6.0f, 4.0f, 12.0f, 7.0f, 5.0f };
	final Float[] q4x = { 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 19.0f, 8.0f, 8.0f, 8.0f };

	NumericColumnSummary<Float> q1 = summarize(q1x);
	NumericColumnSummary<Float> q4 = summarize(q4x);

	Assert.assertEquals(9.0, q1.getMean().doubleValue(), 0.0f);
	Assert.assertEquals(9.0, q4.getMean().doubleValue(), 0.0f);

	Assert.assertEquals(11.0, q1.getVariance().doubleValue(), 1e-10d);
	Assert.assertEquals(11.0, q4.getVariance().doubleValue(), 1e-10d);

	double stddev = Math.sqrt(11.0f);
	Assert.assertEquals(stddev, q1.getStandardDeviation().doubleValue(), 1e-10d);
	Assert.assertEquals(stddev, q4.getStandardDeviation().doubleValue(), 1e-10d);
}
 
Example 24
Source Project: flink   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetYValues() throws Exception {
	final Float[] q1y = { 8.04f, 6.95f, 7.58f, 8.81f, 8.33f, 9.96f, 7.24f, 4.26f, 10.84f, 4.82f, 5.68f };
	final Float[] q2y = { 9.14f, 8.14f, 8.74f, 8.77f, 9.26f, 8.1f, 6.13f, 3.1f, 9.13f, 7.26f, 4.74f };
	final Float[] q3y = { 7.46f, 6.77f, 12.74f, 7.11f, 7.81f, 8.84f, 6.08f, 5.39f, 8.15f, 6.42f, 5.73f };
	final Float[] q4y = { 6.58f, 5.76f, 7.71f, 8.84f, 8.47f, 7.04f, 5.25f, 12.5f, 5.56f, 7.91f, 6.89f };

	NumericColumnSummary<Float> q1 = summarize(q1y);
	NumericColumnSummary<Float> q2 = summarize(q2y);
	NumericColumnSummary<Float> q3 = summarize(q3y);
	NumericColumnSummary<Float> q4 = summarize(q4y);

	// the y values are have less precisely matching means and variances

	Assert.assertEquals(7.5, q1.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q2.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q3.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q4.getMean().doubleValue(), 0.001);

	Assert.assertEquals(4.12, q1.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q2.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q3.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q4.getVariance().doubleValue(), 0.01);
}
 
Example 25
Source Project: flink   Source File: FloatSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 *
 * <p>This method breaks the rule of "testing only one thing" by aggregating
 * and combining a bunch of different ways.
 */
protected NumericColumnSummary<Float> summarize(Float... values) {

	return new AggregateCombineHarness<Float, NumericColumnSummary<Float>, FloatSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Float> result1, NumericColumnSummary<Float> result2) {
			Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0f);
			Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0f);
			Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-12d);
			Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-12d);
		}

	}.summarize(values);
}
 
Example 26
Source Project: flink   Source File: DoubleSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetXValues() throws Exception {

	final Double[] q1x = { 10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0 };
	final Double[] q4x = { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0 };

	NumericColumnSummary<Double> q1 = summarize(q1x);
	NumericColumnSummary<Double> q4 = summarize(q4x);

	Assert.assertEquals(9.0, q1.getMean().doubleValue(), 0.0);
	Assert.assertEquals(9.0, q4.getMean().doubleValue(), 0.0);

	Assert.assertEquals(11.0, q1.getVariance().doubleValue(), 1e-10d);
	Assert.assertEquals(11.0, q4.getVariance().doubleValue(), 1e-10d);

	double stddev = Math.sqrt(11.0);
	Assert.assertEquals(stddev, q1.getStandardDeviation().doubleValue(), 1e-10d);
	Assert.assertEquals(stddev, q4.getStandardDeviation().doubleValue(), 1e-10d);
}
 
Example 27
Source Project: flink   Source File: DoubleSummaryAggregatorTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Use some values from Anscombe's Quartet for testing.
 *
 * <p>There was no particular reason to use these except they have known means and variance.
 *
 * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet
 */
@Test
public void testAnscomesQuartetYValues() throws Exception {
	final Double[] q1y = { 8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68 };
	final Double[] q2y = { 9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74 };
	final Double[] q3y = { 7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73 };
	final Double[] q4y = { 6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89 };

	NumericColumnSummary<Double> q1 = summarize(q1y);
	NumericColumnSummary<Double> q2 = summarize(q2y);
	NumericColumnSummary<Double> q3 = summarize(q3y);
	NumericColumnSummary<Double> q4 = summarize(q4y);

	// the y values are have less precisely matching means and variances

	Assert.assertEquals(7.5, q1.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q2.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q3.getMean().doubleValue(), 0.001);
	Assert.assertEquals(7.5, q4.getMean().doubleValue(), 0.001);

	Assert.assertEquals(4.12, q1.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q2.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q3.getVariance().doubleValue(), 0.01);
	Assert.assertEquals(4.12, q4.getVariance().doubleValue(), 0.01);
}
 
Example 28
Source Project: Flink-CEPplus   Source File: NumericSummaryAggregator.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * combine two aggregations.
 */
@Override
public void combine(Aggregator<T, NumericColumnSummary<T>> otherSameType) {
	NumericSummaryAggregator<T> other = (NumericSummaryAggregator<T>) otherSameType;

	nullCount += other.nullCount;
	nanCount += other.nanCount;
	infinityCount += other.infinityCount;

	if (nonMissingCount == 0) {
		nonMissingCount = other.nonMissingCount;

		min = other.min;
		max = other.max;

		sum = other.sum;
		mean = other.mean;
		m2 = other.m2;
	}
	else if (other.nonMissingCount != 0) {
		long combinedCount = nonMissingCount + other.nonMissingCount;

		min.combine(other.min);
		max.combine(other.max);

		sum.combine(other.sum);

		double deltaMean = other.mean.value() - mean.value();
		mean = mean.add(deltaMean * other.nonMissingCount / combinedCount);
		m2 = m2.add(other.m2).add(deltaMean * deltaMean * nonMissingCount * other.nonMissingCount / combinedCount);

		nonMissingCount = combinedCount;
	}
}
 
Example 29
Source Project: Flink-CEPplus   Source File: ShortSummaryAggregatorTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Helper method for summarizing a list of values.
 */
protected NumericColumnSummary<Short> summarize(Integer... values) {

	// cast everything to short here
	Short[] shortValues = new Short[values.length];
	for (int i = 0; i < values.length; i++) {
		if (values[i] != null) {
			shortValues[i] = values[i].shortValue();
		}
	}

	return new AggregateCombineHarness<Short, NumericColumnSummary<Short>, ShortSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Short> result1, NumericColumnSummary<Short> result2) {

			Assert.assertEquals(result1.getTotalCount(), result2.getTotalCount());
			Assert.assertEquals(result1.getNullCount(), result2.getNullCount());
			Assert.assertEquals(result1.getMissingCount(), result2.getMissingCount());
			Assert.assertEquals(result1.getNonMissingCount(), result2.getNonMissingCount());
			Assert.assertEquals(result1.getInfinityCount(), result2.getInfinityCount());
			Assert.assertEquals(result1.getNanCount(), result2.getNanCount());

			Assert.assertEquals(result1.containsNull(), result2.containsNull());
			Assert.assertEquals(result1.containsNonNull(), result2.containsNonNull());

			Assert.assertEquals(result1.getMin().shortValue(), result2.getMin().shortValue());
			Assert.assertEquals(result1.getMax().shortValue(), result2.getMax().shortValue());
			Assert.assertEquals(result1.getSum().shortValue(), result2.getSum().shortValue());
			Assert.assertEquals(result1.getMean().doubleValue(), result2.getMean().doubleValue(), 1e-12d);
			Assert.assertEquals(result1.getVariance().doubleValue(), result2.getVariance().doubleValue(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation().doubleValue(), result2.getStandardDeviation().doubleValue(), 1e-12d);
		}
	}.summarize(shortValues);
}
 
Example 30
/**
 * Helper method for summarizing a list of values.
 */
protected NumericColumnSummary<Short> summarize(Integer... values) {

	ShortValue[] shortValues = new ShortValue[values.length];
	for (int i = 0; i < values.length; i++) {
		if (values[i] != null) {
			shortValues[i] = new ShortValue(values[i].shortValue());
		}
	}

	return new AggregateCombineHarness<ShortValue, NumericColumnSummary<Short>, ValueSummaryAggregator.ShortValueSummaryAggregator>() {

		@Override
		protected void compareResults(NumericColumnSummary<Short> result1, NumericColumnSummary<Short> result2) {

			Assert.assertEquals(result1.getTotalCount(), result2.getTotalCount());
			Assert.assertEquals(result1.getNullCount(), result2.getNullCount());
			Assert.assertEquals(result1.getMissingCount(), result2.getMissingCount());
			Assert.assertEquals(result1.getNonMissingCount(), result2.getNonMissingCount());
			Assert.assertEquals(result1.getInfinityCount(), result2.getInfinityCount());
			Assert.assertEquals(result1.getNanCount(), result2.getNanCount());

			Assert.assertEquals(result1.containsNull(), result2.containsNull());
			Assert.assertEquals(result1.containsNonNull(), result2.containsNonNull());

			Assert.assertEquals(result1.getMin().shortValue(), result2.getMin().shortValue());
			Assert.assertEquals(result1.getMax().shortValue(), result2.getMax().shortValue());
			Assert.assertEquals(result1.getSum().shortValue(), result2.getSum().shortValue());
			Assert.assertEquals(result1.getMean().doubleValue(), result2.getMean().doubleValue(), 1e-12d);
			Assert.assertEquals(result1.getVariance().doubleValue(), result2.getVariance().doubleValue(), 1e-9d);
			Assert.assertEquals(result1.getStandardDeviation().doubleValue(), result2.getStandardDeviation().doubleValue(), 1e-12d);
		}
	}.summarize(shortValues);
}