org.apache.flink.api.java.aggregation.Aggregations Java Examples

The following examples show how to use org.apache.flink.api.java.aggregation.Aggregations. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AggregateITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testNestedAggregate() throws Exception {
	/*
	 * Nested Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.MIN, 0)
			.aggregate(Aggregations.MIN, 0)
			.project(0);

	List<Tuple1<Integer>> result = aggregateDs.collect();

	String expected = "1\n";

	compareResultAsTuples(result, expected);
}
 
Example #2
Source File: AggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
public AggregateOperator<IN> and(Aggregations function, int field) {
	Preconditions.checkNotNull(function);

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);

	return this;
}
 
Example #3
Source File: AggregateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupedAggregate() throws Exception {
	/*
	 * Grouped Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<Long, Integer>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}
 
Example #4
Source File: AggregateOperatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testAggregationTypes() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

		// should work: multiple aggregates
		tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);

		// should work: nested aggregates
		tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1);

		// should not work: average on string
		try {
			tupleDs.aggregate(Aggregations.SUM, 2);
			Assert.fail();
		} catch (UnsupportedAggregationTypeException iae) {
			// we're good here
		}
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #5
Source File: AggregateOperator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public AggregateOperator(Grouping<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}
 
Example #6
Source File: AggregateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testNestedAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Nested Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple1<IntValue>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.MIN, 0)
			.aggregate(Aggregations.MIN, 0)
			.project(0);

	List<Tuple1<IntValue>> result = aggregateDs.collect();

	String expected = "1\n";

	compareResultAsTuples(result, expected);
}
 
Example #7
Source File: AggregateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testFullAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Full Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds
			.aggregate(Aggregations.SUM, 0)
			.and(Aggregations.MAX, 1)
			.project(0, 1);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "231,6\n";

	compareResultAsTuples(result, expected);
}
 
Example #8
Source File: TPCDSQuery55Parquet.java    From parquet-flinktacular with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		long startTime = System.currentTimeMillis();

		if (!parseParameters(args)) {
			return;
		}

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<DataDim> dataDims = getDataDimDataSet(env).map(new MapDataDim());
		DataSet<Item> item = getItemDataSet(env).map(new MapItem());
		DataSet<StoreSales> storeSales = getStoreSalesDataSet(env).map(new MapStoreSales());

		dataDims.join(storeSales).where(0).equalTo(0).with(new DataDimAndStoreSales())
			.join(item).where(1).equalTo(0).with(new DataDimAndStoreSalesAndItems())
			.groupBy(1, 0).aggregate(Aggregations.SUM, 2)
			.print();

		// execute program
		env.execute("TPC-DS Query 55 Example with Parquet input");

		System.out.println("Execution time: " + (System.currentTimeMillis() - startTime));
	}
 
Example #9
Source File: AggregateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupedAggregate() throws Exception {
	/*
	 * Grouped Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<Long, Integer>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}
 
Example #10
Source File: ScalaAggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public ScalaAggregateOperator(Grouping<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());

	Preconditions.checkNotNull(function);

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}
 
Example #11
Source File: AggregateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupedAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Grouped Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}
 
Example #12
Source File: AggregateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testFullAggregate() throws Exception {
	/*
	 * Full Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<Integer, Long>> aggregateDs = ds
			.aggregate(Aggregations.SUM, 0)
			.and(Aggregations.MAX, 1)
			.project(0, 1);

	List<Tuple2<Integer, Long>> result = aggregateDs.collect();

	String expected = "231,6\n";

	compareResultAsTuples(result, expected);
}
 
Example #13
Source File: ScalaAggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Non grouped aggregation.
 */
public ScalaAggregateOperator(org.apache.flink.api.java.DataSet<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input), input.getType());

	Preconditions.checkNotNull(function);

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}
 
Example #14
Source File: AggregateITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupedAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Grouped Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}
 
Example #15
Source File: AggregateITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupedAggregate() throws Exception {
	/*
	 * Grouped Aggregate
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<Long, Integer>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}
 
Example #16
Source File: AggregateITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testFullAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Full Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds
			.aggregate(Aggregations.SUM, 0)
			.and(Aggregations.MAX, 1)
			.project(0, 1);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "231,6\n";

	compareResultAsTuples(result, expected);
}
 
Example #17
Source File: AggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Non grouped aggregation.
 */
public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input), input.getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}
 
Example #18
Source File: AggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Non grouped aggregation.
 */
public AggregateOperator(DataSet<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input), input.getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}
 
Example #19
Source File: AggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
public AggregateOperator<IN> and(Aggregations function, int field) {
	Preconditions.checkNotNull(function);

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);

	return this;
}
 
Example #20
Source File: AggregateOperatorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testAggregationTypes() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

		// should work: multiple aggregates
		tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);

		// should work: nested aggregates
		tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1);

		// should not work: average on string
		try {
			tupleDs.aggregate(Aggregations.SUM, 2);
			Assert.fail();
		} catch (UnsupportedAggregationTypeException iae) {
			// we're good here
		}
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #21
Source File: ScalaAggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
public ScalaAggregateOperator<IN> and(Aggregations function, int field) {
	Preconditions.checkNotNull(function);

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);

	return this;
}
 
Example #22
Source File: AggregateOperatorTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testAggregationTypes() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);

		// should work: multiple aggregates
		tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4);

		// should work: nested aggregates
		tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1);

		// should not work: average on string
		try {
			tupleDs.aggregate(Aggregations.SUM, 2);
			Assert.fail();
		} catch (UnsupportedAggregationTypeException iae) {
			// we're good here
		}
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #23
Source File: ScalaAggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Non grouped aggregation.
 */
public ScalaAggregateOperator(org.apache.flink.api.java.DataSet<IN> input, Aggregations function, int field) {
	super(Preconditions.checkNotNull(input), input.getType());

	Preconditions.checkNotNull(function);

	if (!input.getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// this is the first aggregation operator after a regular data set (non grouped aggregation)
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = null;
}
 
Example #24
Source File: ScalaAggregateOperator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public ScalaAggregateOperator<IN> and(Aggregations function, int field) {
	Preconditions.checkNotNull(function);

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);

	return this;
}
 
Example #25
Source File: AggregateOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Grouped aggregation.
 *
 * @param input
 * @param function
 * @param field
 */
public AggregateOperator(Grouping<IN> input, Aggregations function, int field, String aggregateLocationName) {
	super(Preconditions.checkNotNull(input).getInputDataSet(), input.getInputDataSet().getType());
	Preconditions.checkNotNull(function);

	this.aggregateLocationName = aggregateLocationName;

	if (!input.getInputDataSet().getType().isTupleType()) {
		throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types.");
	}

	TupleTypeInfoBase<?> inType = (TupleTypeInfoBase<?>) input.getInputDataSet().getType();

	if (field < 0 || field >= inType.getArity()) {
		throw new IllegalArgumentException("Aggregation field position is out of range.");
	}

	AggregationFunctionFactory factory = function.getFactory();
	AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass());

	// set the aggregation fields
	this.aggregationFunctions.add(aggFunct);
	this.fields.add(field);
	this.grouping = input;
}
 
Example #26
Source File: WordCount.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = getTextDataSet(env);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word,1)
				text.flatMap(new Tokenizer())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.aggregate(Aggregations.SUM, 1);

		// emit result
		if (fileOutput) {
			counts.writeAsCsv(outputPath, "\n", " ");
			// execute program
			env.execute("WordCount Example");
		} else {
			counts.print();
		}
	}
 
Example #27
Source File: HighParallelismIterationsTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}
 
Example #28
Source File: HighParallelismIterationsTestProgram.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}
 
Example #29
Source File: ConnectedComponentsWithSolutionSetFirstITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example #30
Source File: ConnectedComponentsWithObjectMapITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}