Java Code Examples for org.apache.flink.optimizer.plan.PlanNode#getInputs()

The following examples show how to use org.apache.flink.optimizer.plan.PlanNode#getInputs() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Example 2

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Example 3

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Example 4

Source File: RangePartitionRewriter.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void postVisit(PlanNode node) {

	if(node instanceof IterationPlanNode) {
		IterationPlanNode iNode = (IterationPlanNode)node;
		if(!visitedIterationNodes.contains(iNode)) {
			visitedIterationNodes.add(iNode);
			iNode.acceptForStepFunction(this);
		}
	}

	final Iterable<Channel> inputChannels = node.getInputs();
	for (Channel channel : inputChannels) {
		ShipStrategyType shipStrategy = channel.getShipStrategy();
		// Make sure we only optimize the DAG for range partition, and do not optimize multi times.
		if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {

			if(channel.getDataDistribution() == null) {
				if (node.isOnDynamicPath()) {
					throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
				}

				PlanNode channelSource = channel.getSource();
				List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
				channelSource.getOutgoingChannels().remove(channel);
				channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
			}
		}
	}
}

Example 5

Source File: RangePartitionRewriter.java From flink with Apache License 2.0

5 votes

@Override
public void postVisit(PlanNode node) {

	if(node instanceof IterationPlanNode) {
		IterationPlanNode iNode = (IterationPlanNode)node;
		if(!visitedIterationNodes.contains(iNode)) {
			visitedIterationNodes.add(iNode);
			iNode.acceptForStepFunction(this);
		}
	}

	final Iterable<Channel> inputChannels = node.getInputs();
	for (Channel channel : inputChannels) {
		ShipStrategyType shipStrategy = channel.getShipStrategy();
		// Make sure we only optimize the DAG for range partition, and do not optimize multi times.
		if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {

			if(channel.getDataDistribution() == null) {
				if (node.isOnDynamicPath()) {
					throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
				}

				PlanNode channelSource = channel.getSource();
				List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
				channelSource.getOutgoingChannels().remove(channel);
				channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
			}
		}
	}
}

Example 6

Source File: RangePartitionRewriter.java From flink with Apache License 2.0

5 votes

@Override
public void postVisit(PlanNode node) {

	if(node instanceof IterationPlanNode) {
		IterationPlanNode iNode = (IterationPlanNode)node;
		if(!visitedIterationNodes.contains(iNode)) {
			visitedIterationNodes.add(iNode);
			iNode.acceptForStepFunction(this);
		}
	}

	final Iterable<Channel> inputChannels = node.getInputs();
	for (Channel channel : inputChannels) {
		ShipStrategyType shipStrategy = channel.getShipStrategy();
		// Make sure we only optimize the DAG for range partition, and do not optimize multi times.
		if (shipStrategy == ShipStrategyType.PARTITION_RANGE) {

			if(channel.getDataDistribution() == null) {
				if (node.isOnDynamicPath()) {
					throw new InvalidProgramException("Range Partitioning not supported within iterations if users do not supply the data distribution.");
				}

				PlanNode channelSource = channel.getSource();
				List<Channel> newSourceOutputChannels = rewriteRangePartitionChannel(channel);
				channelSource.getOutgoingChannels().remove(channel);
				channelSource.getOutgoingChannels().addAll(newSourceOutputChannels);
			}
		}
	}
}

Example 7

Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Example 8

Source File: TestUtils.java From flink with Apache License 2.0

4 votes

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}

Example 9

Source File: TestUtils.java From flink with Apache License 2.0

4 votes

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}