Java Code Examples for org.apache.flink.optimizer.Optimizer#compile()

The following examples show how to use org.apache.flink.optimizer.Optimizer#compile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JsonJobGraphGenerationTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}
 
Example 2
Source File: JsonJobGraphGenerationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}
 
Example 3
Source File: TestUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}
 
Example 4
Source File: TestUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}
 
Example 5
Source File: JsonJobGraphGenerationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}
 
Example 6
Source File: ClientTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = PackagedProgram.newBuilder()
		.setEntryPointClassName(TestOptimizerPlan.class.getName())
		.setArguments("/dev/random", "/tmp")
		.build();

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	Plan plan = (Plan) PackagedProgramUtils.getPipelineFromProgram(prg, new Configuration(), 1, true);
	OptimizedPlan op = optimizer.compile(plan);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}
 
Example 7
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static OptimizedPlan getOptimizedPlan(Optimizer compiler, Plan p, int parallelism) throws CompilerException {
	Logger log = LoggerFactory.getLogger(ClusterClient.class);

	if (parallelism > 0 && p.getDefaultParallelism() <= 0) {
		log.debug("Changing plan default parallelism from {} to {}", p.getDefaultParallelism(), parallelism);
		p.setDefaultParallelism(parallelism);
	}
	log.debug("Set parallelism {}, plan default parallelism {}", parallelism, p.getDefaultParallelism());

	return compiler.compile(p);
}
 
Example 8
Source File: AccumulatorLiveITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}
 
Example 9
Source File: ExecutionPlanJSONGenerator.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public String getExecutionPlan(Plan plan) {
	Optimizer opt = new Optimizer(
			new DataStatistics(),
			new DefaultCostEstimator(),
			new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);
	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}
 
Example 10
Source File: AccumulatorLiveITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}
 
Example 11
Source File: ClusterClient.java    From flink with Apache License 2.0 5 votes vote down vote up
public static OptimizedPlan getOptimizedPlan(Optimizer compiler, Plan p, int parallelism) throws CompilerException {
	Logger log = LoggerFactory.getLogger(ClusterClient.class);

	if (parallelism > 0 && p.getDefaultParallelism() <= 0) {
		log.debug("Changing plan default parallelism from {} to {}", p.getDefaultParallelism(), parallelism);
		p.setDefaultParallelism(parallelism);
	}
	log.debug("Set parallelism {}, plan default parallelism {}", parallelism, p.getDefaultParallelism());

	return compiler.compile(p);
}
 
Example 12
Source File: LocalExecutor.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a JSON representation of the given dataflow's execution plan.
 *
 * @param plan The dataflow plan.
 * @return The dataflow's execution plan, as a JSON string.
 * @throws Exception Thrown, if the optimization process that creates the execution plan failed.
 */
@Override
public String getOptimizerPlanAsJSON(Plan plan) throws Exception {
	final int parallelism = plan.getDefaultParallelism() == ExecutionConfig.PARALLELISM_DEFAULT ? 1 : plan.getDefaultParallelism();

	Optimizer pc = new Optimizer(new DataStatistics(), this.baseConfiguration);
	pc.setDefaultParallelism(parallelism);
	OptimizedPlan op = pc.compile(plan);

	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(op);
}
 
Example 13
Source File: AccumulatorLiveITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}
 
Example 14
Source File: JobGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static JobGraph compileJob(ExecutionEnvironment env) {
	Plan plan = env.createProgramPlan();
	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	return jgg.compileJobGraph(op);
}
 
Example 15
Source File: RemoteExecutor.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public String getOptimizerPlanAsJSON(Plan plan) throws Exception {
	Optimizer opt = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);
	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}
 
Example 16
Source File: PackagedProgramUtils.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a {@link JobGraph} with a specified {@link JobID}
 * from the given {@link PackagedProgram}.
 *
 * @param packagedProgram to extract the JobGraph from
 * @param configuration to use for the optimizer and job graph generator
 * @param defaultParallelism for the JobGraph
 * @param jobID the pre-generated job id
 * @return JobGraph extracted from the PackagedProgram
 * @throws ProgramInvocationException if the JobGraph generation failed
 */
public static JobGraph createJobGraph(
		PackagedProgram packagedProgram,
		Configuration configuration,
		int defaultParallelism,
		@Nullable JobID jobID) throws ProgramInvocationException {
	Thread.currentThread().setContextClassLoader(packagedProgram.getUserCodeClassLoader());
	final Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), configuration);
	final FlinkPlan flinkPlan;

	if (packagedProgram.isUsingProgramEntryPoint()) {

		final JobWithJars jobWithJars = packagedProgram.getPlanWithJars();

		final Plan plan = jobWithJars.getPlan();

		if (plan.getDefaultParallelism() <= 0) {
			plan.setDefaultParallelism(defaultParallelism);
		}

		flinkPlan = optimizer.compile(jobWithJars.getPlan());
	} else if (packagedProgram.isUsingInteractiveMode()) {
		final OptimizerPlanEnvironment optimizerPlanEnvironment = new OptimizerPlanEnvironment(optimizer);

		optimizerPlanEnvironment.setParallelism(defaultParallelism);

		flinkPlan = optimizerPlanEnvironment.getOptimizedPlan(packagedProgram);
	} else {
		throw new ProgramInvocationException("PackagedProgram does not have a valid invocation mode.");
	}

	final JobGraph jobGraph;

	if (flinkPlan instanceof StreamingPlan) {
		jobGraph = ((StreamingPlan) flinkPlan).getJobGraph(jobID);
		jobGraph.setSavepointRestoreSettings(packagedProgram.getSavepointSettings());
	} else {
		final JobGraphGenerator jobGraphGenerator = new JobGraphGenerator(configuration);
		jobGraph = jobGraphGenerator.compileJobGraph((OptimizedPlan) flinkPlan, jobID);
	}

	for (URL url : packagedProgram.getAllLibraries()) {
		try {
			jobGraph.addJar(new Path(url.toURI()));
		} catch (URISyntaxException e) {
			throw new ProgramInvocationException("Invalid URL for jar file: " + url + '.', jobGraph.getJobID(), e);
		}
	}

	jobGraph.setClasspaths(packagedProgram.getClasspaths());

	return jobGraph;
}
 
Example 17
Source File: TestEnvironment.java    From flink with Apache License 2.0 4 votes vote down vote up
private OptimizedPlan compileProgram(String jobName) {
	Plan p = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	return pc.compile(p);
}
 
Example 18
Source File: TestEnvironment.java    From flink with Apache License 2.0 4 votes vote down vote up
private OptimizedPlan compileProgram(String jobName) {
	Plan p = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	return pc.compile(p);
}
 
Example 19
Source File: TestUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
	// set a reduced parallelism for the algorithm runner
	final int parallelism = 8;
	arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));

	// configure the runner but do not execute
	Runner runner = new Runner(arguments).run();

	// we cannot use the actual DataSink since DataSet#writeAsCsv also
	// executes the program; instead, we receive the DataSet and configure
	// with a DiscardingOutputFormat
	DataSet result = runner.getResult();
	if (result != null) {
		result.output(new DiscardingOutputFormat());
	}

	// set the default parallelism higher than the expected parallelism
	ExecutionEnvironment env = runner.getExecutionEnvironment();
	env.setParallelism(2 * parallelism);

	// add default regex exclusions for the added DiscardingOutputFormat
	// and also for any preceding GraphKeyTypeTransform
	List<Pattern> patterns = new ArrayList<>();
	patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
	patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));

	// add user regex patterns
	for (String largeOperatorName : fullParallelismOperatorNames) {
		patterns.add(Pattern.compile(largeOperatorName));
	}

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	// walk the job plan from sinks to sources
	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// skip operators matching an exclusion pattern; these are the
		// large-scale operators which run at full parallelism
		boolean matched = false;
		for (Pattern pattern : patterns) {
			matched |= pattern.matcher(node.getNodeName()).matches();
		}

		if (!matched) {
			// Data sources may have parallelism of 1, so simply check that the node
			// parallelism has not been increased by setting the default parallelism
			assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
		}

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}
 
Example 20
Source File: RemoteExecutor.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public String getOptimizerPlanAsJSON(Plan plan) throws Exception {
	Optimizer opt = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);
	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}