org.apache.flink.graph.utils.Tuple2ToVertexMap Java Examples

The following examples show how to use org.apache.flink.graph.utils.Tuple2ToVertexMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: GraphCsvReader.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Creates a Graph from CSV input without edge values.
 * The vertex values are specified through a vertices input file or a user-defined map function.
 * If no vertices input file is provided, the vertex IDs are automatically created from the edges
 * input file.
 * @param vertexKey the type of the vertex IDs
 * @param vertexValue the type of the vertex values
 * @return a Graph where the vertex IDs and vertex values.
 */
@SuppressWarnings({ "serial", "unchecked" })
public <K, VV> Graph<K, VV, NullValue> vertexTypes(Class<K> vertexKey, Class<VV> vertexValue) {

	if (edgeReader == null) {
		throw new RuntimeException("The edge input file cannot be null!");
	}

	DataSet<Edge<K, NullValue>> edges = edgeReader
		.types(vertexKey, vertexKey)
			.name(GraphCsvReader.class.getName())
		.map(new Tuple2ToEdgeMap<>())
			.name("To Edge");

	// the vertex value can be provided by an input file or a user-defined mapper
	if (vertexReader != null) {
		DataSet<Vertex<K, VV>> vertices = vertexReader
			.types(vertexKey, vertexValue)
				.name(GraphCsvReader.class.getName())
			.map(new Tuple2ToVertexMap<>())
				.name("Type conversion");

		return Graph.fromDataSet(vertices, edges, executionContext);
	}
	else if (mapper != null) {
		return Graph.fromDataSet(edges, (MapFunction<K, VV>) mapper, executionContext);
	}
	else {
		throw new RuntimeException("Vertex values have to be specified through a vertices input file"
				+ "or a user-defined map function.");
	}
}

Example #2

Source File: GraphCsvReader.java From flink with Apache License 2.0

5 votes

/**
 * Creates a Graph from CSV input without edge values.
 * The vertex values are specified through a vertices input file or a user-defined map function.
 * If no vertices input file is provided, the vertex IDs are automatically created from the edges
 * input file.
 * @param vertexKey the type of the vertex IDs
 * @param vertexValue the type of the vertex values
 * @return a Graph where the vertex IDs and vertex values.
 */
@SuppressWarnings({ "serial", "unchecked" })
public <K, VV> Graph<K, VV, NullValue> vertexTypes(Class<K> vertexKey, Class<VV> vertexValue) {

	if (edgeReader == null) {
		throw new RuntimeException("The edge input file cannot be null!");
	}

	DataSet<Edge<K, NullValue>> edges = edgeReader
		.types(vertexKey, vertexKey)
			.name(GraphCsvReader.class.getName())
		.map(new Tuple2ToEdgeMap<>())
			.name("To Edge");

	// the vertex value can be provided by an input file or a user-defined mapper
	if (vertexReader != null) {
		DataSet<Vertex<K, VV>> vertices = vertexReader
			.types(vertexKey, vertexValue)
				.name(GraphCsvReader.class.getName())
			.map(new Tuple2ToVertexMap<>())
				.name("Type conversion");

		return Graph.fromDataSet(vertices, edges, executionContext);
	}
	else if (mapper != null) {
		return Graph.fromDataSet(edges, (MapFunction<K, VV>) mapper, executionContext);
	}
	else {
		throw new RuntimeException("Vertex values have to be specified through a vertices input file"
				+ "or a user-defined map function.");
	}
}

Example #3

Source File: GraphCsvReader.java From flink with Apache License 2.0

5 votes

/**
 * Creates a Graph from CSV input without edge values.
 * The vertex values are specified through a vertices input file or a user-defined map function.
 * If no vertices input file is provided, the vertex IDs are automatically created from the edges
 * input file.
 * @param vertexKey the type of the vertex IDs
 * @param vertexValue the type of the vertex values
 * @return a Graph where the vertex IDs and vertex values.
 */
@SuppressWarnings({ "serial", "unchecked" })
public <K, VV> Graph<K, VV, NullValue> vertexTypes(Class<K> vertexKey, Class<VV> vertexValue) {

	if (edgeReader == null) {
		throw new RuntimeException("The edge input file cannot be null!");
	}

	DataSet<Edge<K, NullValue>> edges = edgeReader
		.types(vertexKey, vertexKey)
			.name(GraphCsvReader.class.getName())
		.map(new Tuple2ToEdgeMap<>())
			.name("To Edge");

	// the vertex value can be provided by an input file or a user-defined mapper
	if (vertexReader != null) {
		DataSet<Vertex<K, VV>> vertices = vertexReader
			.types(vertexKey, vertexValue)
				.name(GraphCsvReader.class.getName())
			.map(new Tuple2ToVertexMap<>())
				.name("Type conversion");

		return Graph.fromDataSet(vertices, edges, executionContext);
	}
	else if (mapper != null) {
		return Graph.fromDataSet(edges, (MapFunction<K, VV>) mapper, executionContext);
	}
	else {
		throw new RuntimeException("Vertex values have to be specified through a vertices input file"
				+ "or a user-defined map function.");
	}
}

Example #4

Source File: SpargelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
	final String broadcastVariableName = "broadcast variable";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
	parameters.addBroadcastSetForScatterFunction(broadcastVariableName, bcVar);
	parameters.addBroadcastSetForGatherFunction(broadcastVariableName, bcVar);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
}

Example #5

Source File: SpargelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program
	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());

	// check that the initial workset sort is outside the loop
	assertEquals(LocalStrategy.SORT, iteration.getInput2().getLocalStrategy());
	assertEquals(new FieldList(0), iteration.getInput2().getLocalStrategyKeys());
}

Example #6

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), new CCCombiner(), 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the combiner
	SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
	assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #7

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
	final String broadcastSetName = "broadcast";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{
		DataSet<Long> bcVar = env.fromElements(1L);

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		VertexCentricConfiguration parameters = new VertexCentricConfiguration();
		parameters.addBroadcastSet(broadcastSetName, bcVar);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100, parameters)
			.getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #8

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #9

Source File: SpargelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
	final String broadcastVariableName = "broadcast variable";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
	parameters.addBroadcastSetForScatterFunction(broadcastVariableName, bcVar);
	parameters.addBroadcastSetForGatherFunction(broadcastVariableName, bcVar);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
}

Example #10

Source File: SpargelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program
	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());

	// check that the initial workset sort is outside the loop
	assertEquals(LocalStrategy.SORT, iteration.getInput2().getLocalStrategy());
	assertEquals(new FieldList(0), iteration.getInput2().getLocalStrategyKeys());
}

Example #11

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), new CCCombiner(), 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the combiner
	SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
	assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #12

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
	final String broadcastSetName = "broadcast";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{
		DataSet<Long> bcVar = env.fromElements(1L);

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		VertexCentricConfiguration parameters = new VertexCentricConfiguration();
		parameters.addBroadcastSet(broadcastSetName, bcVar);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100, parameters)
			.getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #13

Source File: PregelCompilerTest.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #14

Source File: SpargelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
	final String broadcastVariableName = "broadcast variable";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program

	DataSet<Long> bcVar = env.fromElements(1L);

	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
	parameters.addBroadcastSetForScatterFunction(broadcastVariableName, bcVar);
	parameters.addBroadcastSetForGatherFunction(broadcastVariableName, bcVar);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
}

Example #15

Source File: SpargelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testSpargelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	// compose test program
	DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
		new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
		.map(new Tuple2ToVertexMap<>());

	DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
		.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

			public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
				return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
			}
		});

	Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

	DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(
		new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO),
		new ConnectedComponents.CCUpdater<>(), 100)
		.getVertices();

	result.output(new DiscardingOutputFormat<>());

	Plan p = env.createProgramPlan("Spargel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set join and the delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof DualInputPlanNode); // this is only true if the update functions preserves the partitioning

	DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
	assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());

	// check the workset set join
	DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
	assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
	assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
	assertTrue(edgeJoin.getInput1().getTempMode().isCached());

	assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
	assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());

	// check that the initial workset sort is outside the loop
	assertEquals(LocalStrategy.SORT, iteration.getInput2().getLocalStrategy());
	assertEquals(new FieldList(0), iteration.getInput2().getLocalStrategyKeys());
}

Example #16

Source File: PregelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), new CCCombiner(), 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the combiner
	SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
	assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #17

Source File: PregelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
	final String broadcastSetName = "broadcast";

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{
		DataSet<Long> bcVar = env.fromElements(1L);

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		VertexCentricConfiguration parameters = new VertexCentricConfiguration();
		parameters.addBroadcastSet(broadcastSetName, bcVar);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100, parameters)
			.getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #18

Source File: PregelCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	// compose test program
	{

		DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(
			new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L))
			.map(new Tuple2ToVertexMap<>());

		DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L))
			.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

				public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
					return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
				}
			});

		Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);

		DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(
			new CCCompute(), null, 100).getVertices();

		result.output(new DiscardingOutputFormat<>());
	}

	Plan p = env.createProgramPlan("Pregel Connected Components");
	OptimizedPlan op = compileNoStats(p);

	// check the sink
	SinkPlanNode sink = op.getDataSinks().iterator().next();
	assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
	assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());

	// check the iteration
	WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());

	// check the solution set delta
	PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
	assertTrue(ssDelta instanceof SingleInputPlanNode);

	SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
	assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());

	// check the computation coGroup
	DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
	assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
	assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
	assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
	assertTrue(computationCoGroup.getInput2().getTempMode().isCached());

	assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());

	// check that the initial partitioning is pushed out of the loop
	assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
	assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}

Example #19

Source File: Graph.java From flink with Apache License 2.0

3 votes

/**
 * Creates a graph from a DataSet of Tuple2 objects for vertices and
 * Tuple3 objects for edges.
 *
 * <p>The first field of the Tuple2 vertex object will become the vertex ID
 * and the second field will become the vertex value.
 * The first field of the Tuple3 object for edges will become the source ID,
 * the second field will become the target ID, and the third field will become
 * the edge value.
 *
 * @param vertices a DataSet of Tuple2 representing the vertices.
 * @param edges a DataSet of Tuple3 representing the edges.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromTupleDataSet(DataSet<Tuple2<K, VV>> vertices,
		DataSet<Tuple3<K, K, EV>> edges, ExecutionEnvironment context) {

	DataSet<Vertex<K, VV>> vertexDataSet = vertices
		.map(new Tuple2ToVertexMap<>())
			.name("Type conversion");

	DataSet<Edge<K, EV>> edgeDataSet = edges
		.map(new Tuple3ToEdgeMap<>())
			.name("Type conversion");

	return fromDataSet(vertexDataSet, edgeDataSet, context);
}

Example #20

Source File: Graph.java From flink with Apache License 2.0

3 votes

/**
 * Creates a graph from a DataSet of Tuple2 objects for vertices and
 * Tuple3 objects for edges.
 *
 * <p>The first field of the Tuple2 vertex object will become the vertex ID
 * and the second field will become the vertex value.
 * The first field of the Tuple3 object for edges will become the source ID,
 * the second field will become the target ID, and the third field will become
 * the edge value.
 *
 * @param vertices a DataSet of Tuple2 representing the vertices.
 * @param edges a DataSet of Tuple3 representing the edges.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromTupleDataSet(DataSet<Tuple2<K, VV>> vertices,
		DataSet<Tuple3<K, K, EV>> edges, ExecutionEnvironment context) {

	DataSet<Vertex<K, VV>> vertexDataSet = vertices
		.map(new Tuple2ToVertexMap<>())
			.name("Type conversion");

	DataSet<Edge<K, EV>> edgeDataSet = edges
		.map(new Tuple3ToEdgeMap<>())
			.name("Type conversion");

	return fromDataSet(vertexDataSet, edgeDataSet, context);
}

Example #21

Source File: Graph.java From Flink-CEPplus with Apache License 2.0

3 votes

/**
 * Creates a graph from a DataSet of Tuple2 objects for vertices and
 * Tuple3 objects for edges.
 *
 * <p>The first field of the Tuple2 vertex object will become the vertex ID
 * and the second field will become the vertex value.
 * The first field of the Tuple3 object for edges will become the source ID,
 * the second field will become the target ID, and the third field will become
 * the edge value.
 *
 * @param vertices a DataSet of Tuple2 representing the vertices.
 * @param edges a DataSet of Tuple3 representing the edges.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K, VV, EV> Graph<K, VV, EV> fromTupleDataSet(DataSet<Tuple2<K, VV>> vertices,
		DataSet<Tuple3<K, K, EV>> edges, ExecutionEnvironment context) {

	DataSet<Vertex<K, VV>> vertexDataSet = vertices
		.map(new Tuple2ToVertexMap<>())
			.name("Type conversion");

	DataSet<Edge<K, EV>> edgeDataSet = edges
		.map(new Tuple3ToEdgeMap<>())
			.name("Type conversion");

	return fromDataSet(vertexDataSet, edgeDataSet, context);
}