Java Code Examples for org.apache.flink.api.java.operators.DeltaIteration

The following examples show how to use org.apache.flink.api.java.operators.DeltaIteration. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: ScatterGatherIteration.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets up an iteration with the given vertex value(either simple or with degrees).
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")");
	}
}
 
Example 2
Source Project: flink   Source File: ScatterGatherIteration.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets up an iteration with the given vertex value(either simple or with degrees).
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")");
	}
}
 
Example 3
@Override
protected void testProgram() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(1);

		DataSet<Tuple2<Long, Long>> input = env.generateSequence(0, 9).map(new Duplicator<Long>());

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 5, 1);

		iteration.closeWith(iteration.getWorkset(), iteration.getWorkset().map(new TestMapper()))
				.output(new LocalCollectionOutputFormat<Tuple2<Long, Long>>(result));

		env.execute();
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 4
Source Project: flink   Source File: VertexCentricIteration.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets up an iteration with the given vertex value.
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Vertex-centric iteration (" + computeFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Vertex-centric iteration (" + computeFunction + ")");
	}
}
 
Example 5
Source Project: flink   Source File: VertexCentricIteration.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets up an iteration with the given vertex value.
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Vertex-centric iteration (" + computeFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Vertex-centric iteration (" + computeFunction + ")");
	}
}
 
Example 6
Source Project: flink   Source File: ScatterGatherIteration.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Helper method which sets up an iteration with the given vertex value(either simple or with degrees).
 *
 * @param iteration
 */

private void setUpIteration(DeltaIteration<?, ?> iteration) {

	// set up the iteration operator
	if (this.configuration != null) {

		iteration.name(this.configuration.getName("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")"));
		iteration.parallelism(this.configuration.getParallelism());
		iteration.setSolutionSetUnManaged(this.configuration.isSolutionSetUnmanagedMemory());

		// register all aggregators
		for (Map.Entry<String, Aggregator<?>> entry : this.configuration.getAggregators().entrySet()) {
			iteration.registerAggregator(entry.getKey(), entry.getValue());
		}
	}
	else {
		// no configuration provided; set default name
		iteration.name("Scatter-gather iteration (" + gatherFunction + " | " + scatterFunction + ")");
	}
}
 
Example 7
@Override
protected void testProgram() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(verticesPath).fieldDelimiter(" ").types(Long.class).name("Vertices");

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class).name("Edges");

	DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.map(new MapFunction<Tuple1<Long>, Tuple2<Long, Long>>() {
		@Override
		public Tuple2<Long, Long> map(Tuple1<Long> value) throws Exception {
			return new Tuple2<>(value.f0, value.f0);
		}
	}).name("Assign Vertex Ids");

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, MAX_ITERATIONS, 0);

	JoinOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset()
			.join(edges).where(0).equalTo(0)
			.with(new JoinFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
				@Override
				public Tuple2<Long, Long> join(Tuple2<Long, Long> first, Tuple2<Long, Long> second) throws Exception {
					return new Tuple2<>(second.f1, first.f1);
				}
			})
			.name("Join Candidate Id With Neighbor");

	CoGroupOperator<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate())
			.name("min Id and Update");

	iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(resultPath, "\n", " ").name("Result");

	env.execute("Workset Connected Components");
}
 
Example 8
Source Project: Flink-CEPplus   Source File: ConnectedComponentsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 9
@Override
protected void testProgram() throws Exception {

	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
			.map(new VertexParser());

	DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
			.flatMap(new EdgeParser());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration
			.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	// emit result
	List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
	result.output(new LocalCollectionOutputFormat<>(resutTuples));

	env.execute();
}
 
Example 10
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 11
Source Project: Flink-CEPplus   Source File: AggregatorConvergenceITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception {

	// name of the aggregator that checks for convergence
	final String updatedElements = "updated.elements.aggr";

	// the iteration stops if less than this number of elements change value
	final long convergenceThreshold = 3;

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
	DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
		initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0);

	// register the convergence criterion
	iteration.registerAggregationConvergenceCriterion(updatedElements,
		new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold));

	DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0)
		.with(new NeighborWithComponentIDJoin())
		.groupBy(0).min(1);

	DataSet<Tuple2<Long, Long>> updatedComponentId =
		verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.flatMap(new MinimumIdFilter(updatedElements));

	List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect();
	Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>());

	assertEquals(expectedResult, result);
}
 
Example 12
Source Project: flink   Source File: ScatterGatherIteration.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Method that builds the scatter function using a coGroup operator for a vertex
 * containing degree information.
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunctionVerticesWithDegrees(
		DeltaIteration<Vertex<K, Tuple3<VV, LongValue, LongValue>>, Vertex<K, Tuple3<VV, LongValue, LongValue>>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, Tuple3<VV, LongValue, LongValue>, VV, Message, EV> messenger =
			new ScatterUdfWithEVsVVWithDegrees<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");

	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}
 
Example 13
Source Project: Flink-CEPplus   Source File: AggregatorsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAggregatorWithParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator with parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregatorWithParameter(4);
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(result, expected);
}
 
Example 14
Source Project: Flink-CEPplus   Source File: AggregatorsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testConvergenceCriterionWithParameterForIterateDelta() throws Exception {
	/*
	 * Test convergence criterion with parameter for iterate delta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	// register convergence criterion
	iteration.registerAggregationConvergenceCriterion(NEGATIVE_ELEMENTS_AGGR, aggr,
			new NegativeElementsConvergenceCriterionWithParam(3));

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateAndSubtractOneDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).projectFirst(0, 1);

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(-3, -2, -2, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 1);

	assertEquals(expected, result);
}
 
Example 15
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 16
Source Project: flink   Source File: AggregatorsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator without parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(expected, result);
}
 
Example 17
@Override
protected void testProgram() throws Exception {

	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
			.map(new VertexParser());

	DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
			.flatMap(new EdgeParser());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration
			.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	// emit result
	List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
	result.output(new LocalCollectionOutputFormat<>(resutTuples));

	env.execute();
}
 
Example 18
Source Project: flink   Source File: AggregatorConvergenceITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception {

	// name of the aggregator that checks for convergence
	final String updatedElements = "updated.elements.aggr";

	// the iteration stops if less than this number of elements change value
	final long convergenceThreshold = 3;

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
	DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
		initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0);

	// register the convergence criterion
	iteration.registerAggregationConvergenceCriterion(updatedElements,
		new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold));

	DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0)
		.with(new NeighborWithComponentIDJoin())
		.groupBy(0).min(1);

	DataSet<Tuple2<Long, Long>> updatedComponentId =
		verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.flatMap(new MinimumIdFilter(updatedElements));

	List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect();
	Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>());

	assertEquals(expectedResult, result);
}
 
Example 19
Source Project: flink   Source File: PartitionITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = InvalidProgramException.class)
public void testRangePartitionInIteration() throws Exception {

	// does not apply for collection execution
	if (super.mode == TestExecutionMode.COLLECTION) {
		throw new InvalidProgramException("Does not apply for collection execution");
	}

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	DataSource<Long> source = env.generateSequence(0, 10000);

	DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() {
		@Override
		public Tuple2<Long, String> map(Long v) throws Exception {
			return new Tuple2<>(v, Long.toString(v));
		}
	});

	DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0);
	DataSet<Tuple2<Long, String>> body = it.getWorkset()
		.partitionByRange(1) // Verify that range partition is not allowed in iteration
		.join(it.getSolutionSet())
		.where(0).equalTo(0).projectFirst(0).projectSecond(1);
	DataSet<Tuple2<Long, String>> result = it.closeWith(body, body);

	result.collect(); // should fail
}
 
Example 20
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 21
Source Project: flink   Source File: ConnectedComponentsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 22
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().disableSysoutLogging();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}
 
Example 23
Source Project: Flink-CEPplus   Source File: IterationCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyWorksetIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(iter.getWorkset(), iter.getWorkset())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 24
Source Project: Flink-CEPplus   Source File: TempInIterationsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}
 
Example 25
Source Project: Flink-CEPplus   Source File: IterationsCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSolutionSetDeltaDependsOnBroadcastVariable() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> source =
					env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
		
		DataSet<Tuple2<Long, Long>> invariantInput =
				env.generateSequence(1, 1000).map(new DuplicateValueScalar<Long>());
		
		// iteration from here
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = source.iterateDelta(source, 1000, 1);
		
		DataSet<Tuple2<Long, Long>> result =
			invariantInput
				.map(new IdentityMapper<Tuple2<Long, Long>>()).withBroadcastSet(iter.getWorkset(), "bc data")
				.join(iter.getSolutionSet()).where(0).equalTo(1).projectFirst(1).projectSecond(1);
		
		iter.closeWith(result.map(new IdentityMapper<Tuple2<Long,Long>>()), result)
				.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		OptimizedPlan p = compileNoStats(env.createProgramPlan());
		
		// check that the JSON generator accepts this plan
		new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(p);
		
		// check that the JobGraphGenerator accepts the plan
		new JobGraphGenerator().compileJobGraph(p);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 26
Source Project: flink   Source File: IterationCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyWorksetIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(iter.getWorkset(), iter.getWorkset())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 27
Source Project: flink   Source File: ScatterGatherIteration.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Method that builds the scatter function using a coGroup operator for a simple vertex (without
 * degrees).
 * It afterwards configures the function with a custom name and broadcast variables.
 *
 * @param iteration
 * @param messageTypeInfo
 * @param whereArg the argument for the where within the coGroup
 * @param equalToArg the argument for the equalTo within the coGroup
 * @return the scatter function
 */
private CoGroupOperator<?, ?, Tuple2<K, Message>> buildScatterFunction(
		DeltaIteration<Vertex<K, VV>, Vertex<K, VV>> iteration,
		TypeInformation<Tuple2<K, Message>> messageTypeInfo, int whereArg, int equalToArg,
		DataSet<LongValue> numberOfVertices) {

	// build the scatter function (co group)
	CoGroupOperator<?, ?, Tuple2<K, Message>> messages;
	ScatterUdfWithEdgeValues<K, VV, VV, Message, EV> messenger =
			new ScatterUdfWithEVsSimpleVV<>(scatterFunction, messageTypeInfo);

	messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(whereArg)
			.equalTo(equalToArg).with(messenger);

	// configure coGroup message function with name and broadcast variables
	messages = messages.name("Messaging");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getScatterBcastVars()) {
			messages = messages.withBroadcastSet(e.f1, e.f0);
		}
		if (this.configuration.isOptNumVertices()) {
			messages = messages.withBroadcastSet(numberOfVertices, "number of vertices");
		}
	}

	return messages;
}
 
Example 28
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {

		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
		DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
		DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);

		DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);

		DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0)
				.with(new IdentityJoiner<Tuple2<Long, Long>>())
				.name(JOIN_WITH_SOLUTION_SET);
		if(joinPreservesSolutionSet) {
			((JoinOperator<?,?,?>)join2).withForwardedFieldsFirst("*");
		}

		DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>())
				.withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);

		if(mapBeforeSolutionDelta) {

			DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>())
					.withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);

			deltaIt.closeWith(mapper, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		}
		else {
			deltaIt.closeWith(join2, nextWorkset)
					.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		}

		return env.createProgramPlan();
	}
 
Example 29
@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
		
		DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
		iteration.closeWith(iterEnd, iterEnd)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
		
		JobGraphGenerator jgg = new JobGraphGenerator();
		jgg.compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 30
Source Project: Flink-CEPplus   Source File: BranchingPlansCompilerTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * <pre>
 *       (SRC A)         (SRC B)          (SRC C)
 *      /       \       /                /       \
 *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
 *             /    |   \               /
 *         (SINK 3) |   (CROSS => NEXT WORKSET)
 *                  |             |
 *                (JOIN => SOLUTION SET DELTA)
 * </pre>
 */
@Test
public void testClosureDeltaIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());

	sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
	sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);

	DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
	DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");

	DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
	result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}