Java Code Examples for org.apache.flink.api.java.operators.DeltaIteration#closeWith()

The following examples show how to use org.apache.flink.api.java.operators.DeltaIteration#closeWith() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HighParallelismIterationsTestProgram.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
			.rebalance();

	DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
			.rebalance()
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
			.map(new ConnectedComponents.DuplicateValue<>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor,
	// update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
			.where(0).equalTo(0)
			.with(new ConnectedComponents.NeighborWithComponentIDJoin())

			.groupBy(0).aggregate(Aggregations.MIN, 1)

			.join(iteration.getSolutionSet())
			.where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.output(new DiscardingOutputFormat<>());

	env.execute();
}
 
Example 2
Source File: ConnectedComponentsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ConnectedComponents.ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 3
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * <pre>
 *       (SRC A)         (SRC B)          (SRC C)
 *      /       \       /                /       \
 *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
 *             /    |   \               /
 *         (SINK 3) |   (CROSS => NEXT WORKSET)
 *                  |             |
 *                (JOIN => SOLUTION SET DELTA)
 * </pre>
 */
@Test
public void testClosureDeltaIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());

	sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
	sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);

	DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
	DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");

	DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
	result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 4
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * <pre>
 *       (SRC A)         (SRC B)          (SRC C)
 *      /       \       /                /       \
 *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
 *             /    |   \               /
 *         (SINK 3) |   (CROSS => NEXT WORKSET)
 *                  |             |
 *                (JOIN => SOLUTION SET DELTA)
 * </pre>
 */
@Test
public void testClosureDeltaIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());

	sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
	sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);

	DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
	DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");

	DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
	result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 5
Source File: ConnectedComponentsWithObjectMapITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 6
Source File: ConnectedComponentsWithObjectMapITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 7
Source File: CoGroupConnectedComponentsSecondITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {

	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n"))
			.map(new VertexParser());

	DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n"))
			.flatMap(new EdgeParser());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration
			.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.coGroup(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new MinIdAndUpdate());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	// emit result
	List<Tuple2<Long, Long>> resutTuples = new ArrayList<>();
	result.output(new LocalCollectionOutputFormat<>(resutTuples));

	env.execute();
}
 
Example 8
Source File: ConnectedComponentsWithObjectMapITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 9
Source File: ConnectedComponentsWithSolutionSetFirstITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1);

	DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet()
			.join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 10
Source File: AggregatorsITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator without parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(expected, result);
}
 
Example 11
Source File: SuccessAfterNetworkBuffersFailureITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception {

		env.setParallelism(PARALLELISM);

		// read vertex and edge data
		DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env)
				.rebalance();

		DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env)
				.rebalance()
				.flatMap(new ConnectedComponents.UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices
				.map(new ConnectedComponents.DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

		// apply the step logic: join with the edges, select the minimum neighbor,
		// update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges)
				.where(0).equalTo(0)
				.with(new ConnectedComponents.NeighborWithComponentIDJoin())

				.groupBy(0).aggregate(Aggregations.MIN, 1)

				.join(iteration.getSolutionSet())
				.where(0).equalTo(0)
				.with(new ConnectedComponents.ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

		env.execute();
	}
 
Example 12
Source File: AggregatorsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testAggregatorWithoutParameterForIterateDelta() throws Exception {
	/*
	 * Test aggregator without parameter for iterateDelta
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(parallelism);

	DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap());

	DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta(
			initialSolutionSet, MAX_ITERATIONS, 0);

	// register aggregator
	LongSumAggregator aggr = new LongSumAggregator();
	iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr);

	DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta());

	DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet())
			.where(0).equalTo(0).flatMap(new UpdateFilter());

	DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements);
	List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect();
	Collections.sort(result);

	List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5);

	assertEquals(expected, result);
}
 
Example 13
Source File: ConnectedComponents.java    From flink with Apache License 2.0 4 votes vote down vote up
public static void main(String... args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);

		// set up execution environment
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		final int maxIterations = params.getInt("iterations", 10);

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// read vertex and edge data
		DataSet<Long> vertices = getVertexDataSet(env, params);
		DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env, params).flatMap(new UndirectEdge());

		// assign the initial components (equal to the vertex id)
		DataSet<Tuple2<Long, Long>> verticesWithInitialId =
			vertices.map(new DuplicateValue<Long>());

		// open a delta iteration
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
				verticesWithInitialId.iterateDelta(verticesWithInitialId, maxIterations, 0);

		// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
		DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
				.groupBy(0).aggregate(Aggregations.MIN, 1)
				.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new ComponentIdFilter());

		// close the delta iteration (delta and new workset are identical)
		DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

		// emit result
		if (params.has("output")) {
			result.writeAsCsv(params.get("output"), "\n", " ");
			// execute program
			env.execute("Connected Components Example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
	}
 
Example 14
Source File: VertexCentricIteration.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Creates the operator that represents this vertex-centric graph computation.
 *
 * <p>The Pregel iteration is mapped to delta iteration as follows.
 * The solution set consists of the set of active vertices and the workset contains the set of messages
 * send to vertices during the previous superstep. Initially, the workset contains a null message for each vertex.
 * In the beginning of a superstep, the solution set is joined with the workset to produce
 * a dataset containing tuples of vertex state and messages (vertex inbox).
 * The superstep compute UDF is realized with a coGroup between the vertices with inbox and the graph edges.
 * The output of the compute UDF contains both the new vertex values and the new messages produced.
 * These are directed to the solution set delta and new workset, respectively, with subsequent flatMaps.
 *
 * @return The operator that represents this vertex-centric graph computation.
 */
@Override
public DataSet<Vertex<K, VV>> createResult() {
	if (this.initialVertices == null) {
		throw new IllegalStateException("The input data set has not been set.");
	}

	// prepare the type information
	TypeInformation<K> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
	TypeInformation<Tuple2<K, Message>> messageTypeInfo =
		new TupleTypeInfo<>(keyType, messageType);
	TypeInformation<Vertex<K, VV>> vertexType = initialVertices.getType();
	TypeInformation<Either<Vertex<K, VV>, Tuple2<K, Message>>> intermediateTypeInfo =
		new EitherTypeInfo<>(vertexType, messageTypeInfo);
	TypeInformation<Either<NullValue, Message>> nullableMsgTypeInfo =
		new EitherTypeInfo<>(TypeExtractor.getForClass(NullValue.class), messageType);
	TypeInformation<Tuple2<K, Either<NullValue, Message>>> workSetTypeInfo =
		new TupleTypeInfo<>(keyType, nullableMsgTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> initialWorkSet = initialVertices.map(
			new InitializeWorkSet<K, VV, Message>()).returns(workSetTypeInfo);

	final DeltaIteration<Vertex<K, VV>, Tuple2<K, Either<NullValue, Message>>> iteration =
			initialVertices.iterateDelta(initialWorkSet, this.maximumNumberOfIterations, 0);
	setUpIteration(iteration);

	// join with the current state to get vertex values
	DataSet<Tuple2<Vertex<K, VV>, Either<NullValue, Message>>> verticesWithMsgs =
			iteration.getSolutionSet().join(iteration.getWorkset())
			.where(0).equalTo(0)
			.with(new AppendVertexState<>())
			.returns(new TupleTypeInfo<>(
				vertexType, nullableMsgTypeInfo));

	VertexComputeUdf<K, VV, EV, Message> vertexUdf =
		new VertexComputeUdf<>(computeFunction, intermediateTypeInfo);

	CoGroupOperator<?, ?, Either<Vertex<K, VV>, Tuple2<K, Message>>> superstepComputation =
			verticesWithMsgs.coGroup(edgesWithValue)
			.where("f0.f0").equalTo(0)
			.with(vertexUdf);

	// compute the solution set delta
	DataSet<Vertex<K, VV>> solutionSetDelta = superstepComputation.flatMap(
		new ProjectNewVertexValue<>()).returns(vertexType);

	// compute the inbox of each vertex for the next superstep (new workset)
	DataSet<Tuple2<K, Either<NullValue, Message>>> allMessages = superstepComputation.flatMap(
		new ProjectMessages<>()).returns(workSetTypeInfo);

	DataSet<Tuple2<K, Either<NullValue, Message>>> newWorkSet = allMessages;

	// check if a combiner has been provided
	if (combineFunction != null) {

		MessageCombinerUdf<K, Message> combinerUdf =
			new MessageCombinerUdf<>(combineFunction, workSetTypeInfo);

		DataSet<Tuple2<K, Either<NullValue, Message>>> combinedMessages = allMessages
				.groupBy(0).reduceGroup(combinerUdf)
				.setCombinable(true);

		newWorkSet = combinedMessages;
	}

	// configure the compute function
	superstepComputation = superstepComputation.name("Compute Function");
	if (this.configuration != null) {
		for (Tuple2<String, DataSet<?>> e : this.configuration.getBcastVars()) {
			superstepComputation = superstepComputation.withBroadcastSet(e.f1, e.f0);
		}
	}

	return iteration.closeWith(solutionSetDelta, newWorkSet);
}
 
Example 15
Source File: IterationsCompilerTest.java    From flink with Apache License 2.0 4 votes vote down vote up
public static DataSet<Tuple2<Long, Long>> doDeltaIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {

		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> depIteration = vertices.iterateDelta(vertices, 100, 0);
				
		DataSet<Tuple1<Long>> candidates = depIteration.getWorkset().join(edges).where(0).equalTo(0)
				.projectSecond(1);
		
		DataSet<Tuple1<Long>> grouped = candidates.groupBy(0).reduceGroup(new Reduce101());
		
		DataSet<Tuple2<Long, Long>> candidatesDependencies = 
				grouped.join(edges).where(0).equalTo(1).projectSecond(0, 1);
		
		DataSet<Tuple2<Long, Long>> verticesWithNewComponents = 
				candidatesDependencies.join(depIteration.getSolutionSet()).where(0).equalTo(0)
				.with(new Join222())
				.groupBy(0).aggregate(Aggregations.MIN, 1);
		
		DataSet<Tuple2<Long, Long>> updatedComponentId = 
				verticesWithNewComponents.join(depIteration.getSolutionSet()).where(0).equalTo(0)
				.flatMap(new FlatMapJoin());
		
		DataSet<Tuple2<Long, Long>> depResult = depIteration.closeWith(updatedComponentId, updatedComponentId);

		return depResult;
		
	}
 
Example 16
Source File: ConnectedComponentsWithDeferredUpdateITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
			.flatMap(new ConnectedComponents.UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset()
			.join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new UpdateComponentIdMatchNonPreserving());

	DataSet<Tuple2<Long, Long>> delta;
	if (extraMapper) {
		delta = changes.map(
				// ID Mapper
				new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() {
					private static final long serialVersionUID = -3929364091829757322L;

					@Override
					public Tuple2<Long, Long> map(Tuple2<Long, Long> v) throws Exception {
						return v;
					}
				});
	}
	else {
		delta = changes;
	}

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(delta, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 17
Source File: MultipleJoinsWithSolutionSetCompilerTest.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) {

		DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0);

		DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet()
				.join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1)
				.groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander())
				.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2);

		DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1);

		DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes);

		return result;
	}
 
Example 18
Source File: MultipleJoinsWithSolutionSetCompilerTest.java    From flink with Apache License 2.0 3 votes vote down vote up
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) {

		DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0);

		DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet()
				.join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1)
				.groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander())
				.join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2);

		DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1);

		DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes);

		return result;
	}
 
Example 19
Source File: NestedIterationsTest.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test
public void testDeltaIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> data1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		DataSet<Tuple2<Long, Long>> data2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> firstIteration = data1.iterateDelta(data1, 100, 0);
		
		DataSet<Tuple2<Long, Long>> inFirst = firstIteration.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>());
		
		DataSet<Tuple2<Long, Long>> firstResult = firstIteration.closeWith(inFirst, inFirst).map(new IdentityMapper<Tuple2<Long,Long>>());
		
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> mainIteration = data2.iterateDelta(data2, 100, 0);
		
		DataSet<Tuple2<Long, Long>> joined = mainIteration.getWorkset().join(firstResult).where(0).equalTo(0)
						.projectFirst(0).projectSecond(0);
		
		DataSet<Tuple2<Long, Long>> mainResult = mainIteration.closeWith(joined, joined);
		
		mainResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 20
Source File: NestedIterationsTest.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
@Test
public void testDeltaIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> data1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		DataSet<Tuple2<Long, Long>> data2 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> firstIteration = data1.iterateDelta(data1, 100, 0);
		
		DataSet<Tuple2<Long, Long>> inFirst = firstIteration.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>());
		
		DataSet<Tuple2<Long, Long>> firstResult = firstIteration.closeWith(inFirst, inFirst).map(new IdentityMapper<Tuple2<Long,Long>>());
		
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> mainIteration = data2.iterateDelta(data2, 100, 0);
		
		DataSet<Tuple2<Long, Long>> joined = mainIteration.getWorkset().join(firstResult).where(0).equalTo(0)
						.projectFirst(0).projectSecond(0);
		
		DataSet<Tuple2<Long, Long>> mainResult = mainIteration.closeWith(joined, joined);
		
		mainResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}