org.apache.flink.optimizer.plan.BulkIterationPlanNode Java Examples

The following examples show how to use org.apache.flink.optimizer.plan.BulkIterationPlanNode. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: IterationsCompilerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #2

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #3

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #4

Source File: PageRankCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testPageRank() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<Long> pagesInput = env.fromElements(1L);
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> linksInput = env.fromElements(new Tuple2<Long, Long>(1L, 2L));

		// assign initial rank to pages
		DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
				map(new RankAssigner((1.0d / 10)));

		// build adjacency list from link input
		DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
				linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());

		// set iterative data set
		IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(10);

		Configuration cfg = new Configuration();
		cfg.setString(Optimizer.HINT_LOCAL_STRATEGY, Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				// join pages with outgoing edges and distribute rank
				.join(adjacencyListInput).where(0).equalTo(0).withParameters(cfg)
				.flatMap(new JoinVertexWithEdgesMatch())
				// collect and sum ranks
				.groupBy(0).aggregate(SUM, 1)
				// apply dampening factor
				.map(new Dampener(0.85, 10));

		DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
				newRanks,
				newRanks.join(iteration).where(0).equalTo(0)
				// termination condition
				.filter(new EpsilonFilter()));

		finalPageRanks.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());

		// get the plan and compile it
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sinkPlanNode = (SinkPlanNode) op.getDataSinks().iterator().next();
		BulkIterationPlanNode iterPlanNode = (BulkIterationPlanNode) sinkPlanNode.getInput().getSource();

		// check that the partitioning is pushed out of the first loop
		Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iterPlanNode.getInput().getShipStrategy());
		Assert.assertEquals(LocalStrategy.NONE, iterPlanNode.getInput().getLocalStrategy());

		BulkPartialSolutionPlanNode partSolPlanNode = iterPlanNode.getPartialSolutionPlanNode();
		Assert.assertEquals(ShipStrategyType.FORWARD, partSolPlanNode.getOutgoingChannels().get(0).getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #5

Source File: IterationsCompilerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testTwoIterationsDirectlyChained() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
		
		DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
		
		depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		BulkIterationPlanNode bipn = (BulkIterationPlanNode)wipn.getInput1().getSource();

		// the hash partitioning has been pushed out of the delta iteration into the bulk iteration
		assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());

		// the input of the root step function is the last operator of the step function
		// since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning
		for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
		
		assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
		assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #6

Source File: PipelineBreakerTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * 
 * 
 * 
 * <pre>
 *                                +----------- ITERATION ---------+
 *                                |                               |
 *                               +--+                           +----+
 *  (source 1) ----------------->|PS| ------------ +        +-->|next|---> (sink)
 *                               +--+              | (BC)   |   +----+
 *                                |                V        |     |
 *  (source 2) --> (map) --+------|-----------> (MAPPER) ---+     |
 *                         |      |                ^              |
 *                         |      |                | (BC)         |
 *                         |      +----------------|--------------+
 *                         |                       |
 *                         +--(map) --> (reduce) --+
 * </pre>
 */
@Test
public void testPipelineBreakerBroadcastedPartialSolution() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
		env.setParallelism(64);
		
		DataSet<Long> initialSource = env.generateSequence(1, 10);
		IterativeDataSet<Long> iteration = initialSource.iterate(100);
		
		
		DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
		
		DataSet<Long> bcInput1 = sourceWithMapper
									.map(new IdentityMapper<Long>())
									.reduce(new SelectOneReducer<Long>());
		
		DataSet<Long> result = sourceWithMapper
				.map(new IdentityMapper<Long>())
						.withBroadcastSet(iteration, "bc2")
						.withBroadcastSet(bcInput1, "bc1");
						
		
		iteration.closeWith(result).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		BulkIterationPlanNode iterationPlanNode = (BulkIterationPlanNode) sink.getInput().getSource();
		SingleInputPlanNode mapper = (SingleInputPlanNode) iterationPlanNode.getRootOfStepFunction();
		
		assertEquals(TempMode.CACHED, mapper.getInput().getTempMode());
		assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #7

Source File: PageRankCompilerTest.java From flink with Apache License 2.0

4 votes

@Test
public void testPageRank() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<Long> pagesInput = env.fromElements(1L);
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> linksInput = env.fromElements(new Tuple2<Long, Long>(1L, 2L));

		// assign initial rank to pages
		DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
				map(new RankAssigner((1.0d / 10)));

		// build adjacency list from link input
		DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
				linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());

		// set iterative data set
		IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(10);

		Configuration cfg = new Configuration();
		cfg.setString(Optimizer.HINT_LOCAL_STRATEGY, Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				// join pages with outgoing edges and distribute rank
				.join(adjacencyListInput).where(0).equalTo(0).withParameters(cfg)
				.flatMap(new JoinVertexWithEdgesMatch())
				// collect and sum ranks
				.groupBy(0).aggregate(SUM, 1)
				// apply dampening factor
				.map(new Dampener(0.85, 10));

		DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
				newRanks,
				newRanks.join(iteration).where(0).equalTo(0)
				// termination condition
				.filter(new EpsilonFilter()));

		finalPageRanks.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());

		// get the plan and compile it
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sinkPlanNode = (SinkPlanNode) op.getDataSinks().iterator().next();
		BulkIterationPlanNode iterPlanNode = (BulkIterationPlanNode) sinkPlanNode.getInput().getSource();

		// check that the partitioning is pushed out of the first loop
		Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iterPlanNode.getInput().getShipStrategy());
		Assert.assertEquals(LocalStrategy.NONE, iterPlanNode.getInput().getLocalStrategy());

		BulkPartialSolutionPlanNode partSolPlanNode = iterPlanNode.getPartialSolutionPlanNode();
		Assert.assertEquals(ShipStrategyType.FORWARD, partSolPlanNode.getOutgoingChannels().get(0).getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #8

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTwoIterationsDirectlyChained() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
		
		DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
		
		depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		BulkIterationPlanNode bipn = (BulkIterationPlanNode)wipn.getInput1().getSource();

		// the hash partitioning has been pushed out of the delta iteration into the bulk iteration
		assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());

		// the input of the root step function is the last operator of the step function
		// since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning
		for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
		
		assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
		assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #9

Source File: PipelineBreakerTest.java From flink with Apache License 2.0

4 votes

/**
 * 
 * 
 * 
 * <pre>
 *                                +----------- ITERATION ---------+
 *                                |                               |
 *                               +--+                           +----+
 *  (source 1) ----------------->|PS| ------------ +        +-->|next|---> (sink)
 *                               +--+              | (BC)   |   +----+
 *                                |                V        |     |
 *  (source 2) --> (map) --+------|-----------> (MAPPER) ---+     |
 *                         |      |                ^              |
 *                         |      |                | (BC)         |
 *                         |      +----------------|--------------+
 *                         |                       |
 *                         +--(map) --> (reduce) --+
 * </pre>
 */
@Test
public void testPipelineBreakerBroadcastedPartialSolution() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
		env.setParallelism(64);
		
		DataSet<Long> initialSource = env.generateSequence(1, 10);
		IterativeDataSet<Long> iteration = initialSource.iterate(100);
		
		
		DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
		
		DataSet<Long> bcInput1 = sourceWithMapper
									.map(new IdentityMapper<Long>())
									.reduce(new SelectOneReducer<Long>());
		
		DataSet<Long> result = sourceWithMapper
				.map(new IdentityMapper<Long>())
						.withBroadcastSet(iteration, "bc2")
						.withBroadcastSet(bcInput1, "bc1");
						
		
		iteration.closeWith(result).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		BulkIterationPlanNode iterationPlanNode = (BulkIterationPlanNode) sink.getInput().getSource();
		SingleInputPlanNode mapper = (SingleInputPlanNode) iterationPlanNode.getRootOfStepFunction();
		
		assertEquals(TempMode.CACHED, mapper.getInput().getTempMode());
		assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #10

Source File: PageRankCompilerTest.java From flink with Apache License 2.0

4 votes

@Test
public void testPageRank() {
	try {
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<Long> pagesInput = env.fromElements(1L);
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> linksInput = env.fromElements(new Tuple2<Long, Long>(1L, 2L));

		// assign initial rank to pages
		DataSet<Tuple2<Long, Double>> pagesWithRanks = pagesInput.
				map(new RankAssigner((1.0d / 10)));

		// build adjacency list from link input
		DataSet<Tuple2<Long, Long[]>> adjacencyListInput =
				linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());

		// set iterative data set
		IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(10);

		Configuration cfg = new Configuration();
		cfg.setString(Optimizer.HINT_LOCAL_STRATEGY, Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);

		DataSet<Tuple2<Long, Double>> newRanks = iteration
				// join pages with outgoing edges and distribute rank
				.join(adjacencyListInput).where(0).equalTo(0).withParameters(cfg)
				.flatMap(new JoinVertexWithEdgesMatch())
				// collect and sum ranks
				.groupBy(0).aggregate(SUM, 1)
				// apply dampening factor
				.map(new Dampener(0.85, 10));

		DataSet<Tuple2<Long, Double>> finalPageRanks = iteration.closeWith(
				newRanks,
				newRanks.join(iteration).where(0).equalTo(0)
				// termination condition
				.filter(new EpsilonFilter()));

		finalPageRanks.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());

		// get the plan and compile it
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sinkPlanNode = (SinkPlanNode) op.getDataSinks().iterator().next();
		BulkIterationPlanNode iterPlanNode = (BulkIterationPlanNode) sinkPlanNode.getInput().getSource();

		// check that the partitioning is pushed out of the first loop
		Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iterPlanNode.getInput().getShipStrategy());
		Assert.assertEquals(LocalStrategy.NONE, iterPlanNode.getInput().getLocalStrategy());

		BulkPartialSolutionPlanNode partSolPlanNode = iterPlanNode.getPartialSolutionPlanNode();
		Assert.assertEquals(ShipStrategyType.FORWARD, partSolPlanNode.getOutgoingChannels().get(0).getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #11

Source File: IterationsCompilerTest.java From flink with Apache License 2.0

4 votes

@Test
public void testTwoIterationsDirectlyChained() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> bulkResult = doBulkIteration(verticesWithInitialId, edges);
		
		DataSet<Tuple2<Long, Long>> depResult = doDeltaIteration(bulkResult, edges);
		
		depResult.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		BulkIterationPlanNode bipn = (BulkIterationPlanNode)wipn.getInput1().getSource();

		// the hash partitioning has been pushed out of the delta iteration into the bulk iteration
		assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());

		// the input of the root step function is the last operator of the step function
		// since the work has been pushed out of the bulk iteration, it has to guarantee the hash partitioning
		for (Channel c : bipn.getRootOfStepFunction().getInputs()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());
		
		assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
		assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Example #12

Source File: PipelineBreakerTest.java From flink with Apache License 2.0

4 votes

/**
 * 
 * 
 * 
 * <pre>
 *                                +----------- ITERATION ---------+
 *                                |                               |
 *                               +--+                           +----+
 *  (source 1) ----------------->|PS| ------------ +        +-->|next|---> (sink)
 *                               +--+              | (BC)   |   +----+
 *                                |                V        |     |
 *  (source 2) --> (map) --+------|-----------> (MAPPER) ---+     |
 *                         |      |                ^              |
 *                         |      |                | (BC)         |
 *                         |      +----------------|--------------+
 *                         |                       |
 *                         +--(map) --> (reduce) --+
 * </pre>
 */
@Test
public void testPipelineBreakerBroadcastedPartialSolution() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
		env.setParallelism(64);
		
		DataSet<Long> initialSource = env.generateSequence(1, 10);
		IterativeDataSet<Long> iteration = initialSource.iterate(100);
		
		
		DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
		
		DataSet<Long> bcInput1 = sourceWithMapper
									.map(new IdentityMapper<Long>())
									.reduce(new SelectOneReducer<Long>());
		
		DataSet<Long> result = sourceWithMapper
				.map(new IdentityMapper<Long>())
						.withBroadcastSet(iteration, "bc2")
						.withBroadcastSet(bcInput1, "bc1");
						
		
		iteration.closeWith(result).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		BulkIterationPlanNode iterationPlanNode = (BulkIterationPlanNode) sink.getInput().getSource();
		SingleInputPlanNode mapper = (SingleInputPlanNode) iterationPlanNode.getRootOfStepFunction();
		
		assertEquals(TempMode.CACHED, mapper.getInput().getTempMode());
		assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}