org.apache.flink.optimizer.Optimizer Java Examples

The following examples show how to use org.apache.flink.optimizer.Optimizer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}
 
Example #2
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForTuplesWithCombine() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples with combine
	 */
	org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
			.map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4);

	Configuration cfg = new Configuration();
	cfg.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION);
	DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
			.withParameters(cfg);

	List<Tuple2<Integer, String>> result = reduceDs.collect();

	String expected = "322," +
			"testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";

	compareResultAsTuples(result, expected);
}
 
Example #3
Source File: FlinkRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testEnsureStdoutStdErrIsRestored() throws Exception {
  PackagedProgram packagedProgram = new PackagedProgram(getClass());
  OptimizerPlanEnvironment env = new OptimizerPlanEnvironment(new Optimizer(new Configuration()));
  try {
    // Flink will throw an error because no job graph will be generated by the main method
    env.getOptimizedPlan(packagedProgram);
    Assert.fail("This should have failed to create the Flink Plan.");
  } catch (ProgramInvocationException e) {
    // Test that Flink wasn't able to intercept the stdout/stderr and we printed to the regular
    // output instead
    MatcherAssert.assertThat(
        e.getMessage(),
        allOf(
            StringContains.containsString("System.out: (none)"),
            StringContains.containsString("System.err: (none)")));
  }
}
 
Example #4
Source File: TestUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}
 
Example #5
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static FlinkPlan getOptimizedPlan(Optimizer compiler, PackagedProgram prog, int parallelism)
		throws CompilerException, ProgramInvocationException {
	Thread.currentThread().setContextClassLoader(prog.getUserCodeClassLoader());
	if (prog.isUsingProgramEntryPoint()) {
		return getOptimizedPlan(compiler, prog.getPlanWithJars(), parallelism);
	} else if (prog.isUsingInteractiveMode()) {
		// temporary hack to support the optimizer plan preview
		OptimizerPlanEnvironment env = new OptimizerPlanEnvironment(compiler);
		if (parallelism > 0) {
			env.setParallelism(parallelism);
		}

		return env.getOptimizedPlan(prog);
	} else {
		throw new RuntimeException("Couldn't determine program mode.");
	}
}
 
Example #6
Source File: ClientTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = PackagedProgram.newBuilder()
		.setEntryPointClassName(TestOptimizerPlan.class.getName())
		.setArguments("/dev/random", "/tmp")
		.build();

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	Plan plan = (Plan) PackagedProgramUtils.getPipelineFromProgram(prg, new Configuration(), 1, true);
	OptimizedPlan op = optimizer.compile(plan);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}
 
Example #7
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a instance that submits the programs to the JobManager defined in the
 * configuration. This method will try to resolve the JobManager hostname and throw an exception
 * if that is not possible.
 *
 * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer.
 * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval
 * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down
 */
public ClusterClient(
		Configuration flinkConfig,
		HighAvailabilityServices highAvailabilityServices,
		boolean sharedHaServices) {
	this.flinkConfig = Preconditions.checkNotNull(flinkConfig);
	this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);

	this.timeout = AkkaUtils.getClientTimeout(flinkConfig);
	this.lookupTimeout = AkkaUtils.getLookupTimeout(flinkConfig);

	this.actorSystemLoader = new LazyActorSystemLoader(
		highAvailabilityServices,
		Time.milliseconds(lookupTimeout.toMillis()),
		flinkConfig,
		log);

	this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices);
	this.sharedHaServices = sharedHaServices;
}
 
Example #8
Source File: GroupReduceITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForTuplesWithCombine() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples with combine
	 */
	org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
			.map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4);

	Configuration cfg = new Configuration();
	cfg.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION);
	DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
			.withParameters(cfg);

	List<Tuple2<Integer, String>> result = reduceDs.collect();

	String expected = "322," +
			"testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";

	compareResultAsTuples(result, expected);
}
 
Example #9
Source File: PreviewPlanDumpTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void verifyPlanDump(Class<?> entrypoint, String... args) throws Exception {
	final PackagedProgram program = PackagedProgram
		.newBuilder()
		.setEntryPointClassName(entrypoint.getName())
		.setArguments(args)
		.build();

	final Pipeline pipeline = PackagedProgramUtils.getPipelineFromProgram(program, new Configuration(), 1, true);

	assertTrue(pipeline instanceof Plan);

	final Plan plan = (Plan) pipeline;

	final List<DataSinkNode> sinks = Optimizer.createPreOptimizedPlan(plan);
	final PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	final String json = dumper.getPactPlanAsJSON(sinks);

	try (JsonParser parser = new JsonFactory().createParser(json)) {
		while (parser.nextToken() != null) {
		}
	}
}
 
Example #10
Source File: ExecutionContext.java    From flink with Apache License 2.0 6 votes vote down vote up
private FlinkPlan createPlan(String name, Configuration flinkConfig) {
	if (streamExecEnv != null) {
		// special case for Blink planner to apply batch optimizations
		// note: it also modifies the ExecutionConfig!
		if (executor instanceof ExecutorBase) {
			return ((ExecutorBase) executor).generateStreamGraph(name);
		}
		return streamExecEnv.getStreamGraph(name);
	} else {
		final int parallelism = execEnv.getParallelism();
		final Plan unoptimizedPlan = execEnv.createProgramPlan();
		unoptimizedPlan.setJobName(name);
		final Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);
		return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism);
	}
}
 
Example #11
Source File: JsonJobGraphGenerationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}
 
Example #12
Source File: TestUtils.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}
 
Example #13
Source File: ClientTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
	assertNotNull(prg.getPreviewPlan());

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, 1);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}
 
Example #14
Source File: ClusterClient.java    From flink with Apache License 2.0 6 votes vote down vote up
public static FlinkPlan getOptimizedPlan(Optimizer compiler, PackagedProgram prog, int parallelism)
		throws CompilerException, ProgramInvocationException {
	final ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
	try {
		Thread.currentThread().setContextClassLoader(prog.getUserCodeClassLoader());
		if (prog.isUsingProgramEntryPoint()) {
			return getOptimizedPlan(compiler, prog.getPlanWithJars(), parallelism);
		} else if (prog.isUsingInteractiveMode()) {
			// temporary hack to support the optimizer plan preview
			OptimizerPlanEnvironment env = new OptimizerPlanEnvironment(compiler);
			if (parallelism > 0) {
				env.setParallelism(parallelism);
			}

			return env.getOptimizedPlan(prog);
		} else {
			throw new RuntimeException("Couldn't determine program mode.");
		}
	} finally {
		Thread.currentThread().setContextClassLoader(contextClassLoader);
	}
}
 
Example #15
Source File: JsonJobGraphGenerationTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}
 
Example #16
Source File: GroupReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testCorrectnessOfAllGroupReduceForTuplesWithCombine() throws Exception {
	/*
	 * check correctness of all-groupreduce for tuples with combine
	 */
	org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION);

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env)
			.map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4);

	Configuration cfg = new Configuration();
	cfg.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION);
	DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine())
			.withParameters(cfg);

	List<Tuple2<Integer, String>> result = reduceDs.collect();

	String expected = "322," +
			"testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n";

	compareResultAsTuples(result, expected);
}
 
Example #17
Source File: TempInIterationsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}
 
Example #18
Source File: ExecutionPlanCreationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetExecutionPlan() {
	try {
		PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
		assertNotNull(prg.getPreviewPlan());

		InetAddress mockAddress = InetAddress.getLocalHost();
		InetSocketAddress mockJmAddress = new InetSocketAddress(mockAddress, 12345);

		Configuration config = new Configuration();

		config.setString(JobManagerOptions.ADDRESS, mockJmAddress.getHostName());
		config.setInteger(JobManagerOptions.PORT, mockJmAddress.getPort());

		Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
		OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, -1);
		assertNotNull(op);

		PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
		assertNotNull(dumper.getOptimizerPlanAsJSON(op));

		// test HTML escaping
		PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
		dumper2.setEncodeForHTML(true);
		String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

		assertEquals(-1, htmlEscaped.indexOf('\\'));
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #19
Source File: ExecutionContext.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private FlinkPlan createPlan(String name, Configuration flinkConfig) {
	if (streamExecEnv != null) {
		final StreamGraph graph = streamExecEnv.getStreamGraph();
		graph.setJobName(name);
		return graph;
	} else {
		final int parallelism = execEnv.getParallelism();
		final Plan unoptimizedPlan = execEnv.createProgramPlan();
		unoptimizedPlan.setJobName(name);
		final Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);
		return ClusterClient.getOptimizedPlan(compiler, unoptimizedPlan, parallelism);
	}
}
 
Example #20
Source File: JobGraphGeneratorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private static JobGraph compileJob(ExecutionEnvironment env) {
	Plan plan = env.createProgramPlan();
	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	return jgg.compileJobGraph(op);
}
 
Example #21
Source File: ClusterClient.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a instance that submits the programs to the JobManager defined in the
 * configuration. This method will try to resolve the JobManager hostname and throw an exception
 * if that is not possible.
 *
 * @param flinkConfig The config used to obtain the job-manager's address, and used to configure the optimizer.
 * @param highAvailabilityServices HighAvailabilityServices to use for leader retrieval
 * @param sharedHaServices true if the HighAvailabilityServices are shared and must not be shut down
 */
public ClusterClient(
		Configuration flinkConfig,
		HighAvailabilityServices highAvailabilityServices,
		boolean sharedHaServices) {
	this.flinkConfig = Preconditions.checkNotNull(flinkConfig);
	this.compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), flinkConfig);

	this.timeout = AkkaUtils.getClientTimeout(flinkConfig);

	this.highAvailabilityServices = Preconditions.checkNotNull(highAvailabilityServices);
	this.sharedHaServices = sharedHaServices;
}
 
Example #22
Source File: PlanTranslator.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public String translateToJSONExecutionPlan(Pipeline pipeline) {
	checkArgument(pipeline instanceof Plan, "Given pipeline is not a DataSet Plan.");

	Plan plan = (Plan) pipeline;

	Optimizer opt = new Optimizer(
			new DataStatistics(),
			new DefaultCostEstimator(),
			new Configuration());
	OptimizedPlan optPlan = opt.compile(plan);

	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}
 
Example #23
Source File: ClusterClient.java    From flink with Apache License 2.0 5 votes vote down vote up
public static OptimizedPlan getOptimizedPlan(Optimizer compiler, Plan p, int parallelism) throws CompilerException {
	Logger log = LoggerFactory.getLogger(ClusterClient.class);

	if (parallelism > 0 && p.getDefaultParallelism() <= 0) {
		log.debug("Changing plan default parallelism from {} to {}", p.getDefaultParallelism(), parallelism);
		p.setDefaultParallelism(parallelism);
	}
	log.debug("Set parallelism {}, plan default parallelism {}", parallelism, p.getDefaultParallelism());

	return compiler.compile(p);
}
 
Example #24
Source File: PreviewPlanEnvironment.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	this.plan = createProgramPlan(jobName);
	this.previewPlan = Optimizer.createPreOptimizedPlan(plan);

	// do not go on with anything now!
	throw new OptimizerPlanEnvironment.ProgramAbortException();
}
 
Example #25
Source File: LocalExecutor.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a JSON representation of the given dataflow's execution plan.
 *
 * @param plan The dataflow plan.
 * @return The dataflow's execution plan, as a JSON string.
 * @throws Exception Thrown, if the optimization process that creates the execution plan failed.
 */
public static String optimizerPlanAsJSON(Plan plan) throws Exception {
	final int parallelism = plan.getDefaultParallelism() == ExecutionConfig.PARALLELISM_DEFAULT ? 1 : plan.getDefaultParallelism();

	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	pc.setDefaultParallelism(parallelism);
	OptimizedPlan op = pc.compile(plan);

	return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(op);
}
 
Example #26
Source File: AccumulatorLiveITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}
 
Example #27
Source File: GroupReduceNode.java    From flink with Apache License 2.0 5 votes vote down vote up
private List<OperatorDescriptorSingle> initPossibleProperties(Partitioner<?> customPartitioner) {
	// see if an internal hint dictates the strategy to use
	final Configuration conf = getOperator().getParameters();
	final String localStrategy = conf.getString(Optimizer.HINT_LOCAL_STRATEGY, null);

	final boolean useCombiner;
	if (localStrategy != null) {
		if (Optimizer.HINT_LOCAL_STRATEGY_SORT.equals(localStrategy)) {
			useCombiner = false;
		}
		else if (Optimizer.HINT_LOCAL_STRATEGY_COMBINING_SORT.equals(localStrategy)) {
			if (!isCombineable()) {
				Optimizer.LOG.warn("Strategy hint for GroupReduce '" + getOperator().getName() +
					"' requires combinable reduce, but user function is not marked combinable.");
			}
			useCombiner = true;
		} else {
			throw new CompilerException("Invalid local strategy hint for match contract: " + localStrategy);
		}
	} else {
		useCombiner = isCombineable();
	}
	
	// check if we can work with a grouping (simple reducer), or if we need ordering because of a group order
	Ordering groupOrder = null;
	if (getOperator() != null) {
		groupOrder = getOperator().getGroupOrder();
		if (groupOrder != null && groupOrder.getNumberOfFields() == 0) {
			groupOrder = null;
		}
	}
	
	OperatorDescriptorSingle props = useCombiner ?
		(this.keys == null ? new AllGroupWithPartialPreGroupProperties() : new GroupReduceWithCombineProperties(this.keys, groupOrder, customPartitioner)) :
		(this.keys == null ? new AllGroupReduceProperties() : new GroupReduceProperties(this.keys, groupOrder, customPartitioner));

	return Collections.singletonList(props);
}
 
Example #28
Source File: CompilerTestBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() {
	Configuration flinkConf = new Configuration();
	this.dataStats = new DataStatistics();
	this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf);
	this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);

	this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf);
	this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);
}
 
Example #29
Source File: TempInIterationsTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testTempInIterationTest() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class);

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			input.iterateDelta(input, 1, 0);

	DataSet<Tuple2<Long, Long>> update = iteration.getWorkset()
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
				.with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());

	iteration.closeWith(update, update)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());


	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jg = jgg.compileJobGraph(oPlan);

	boolean solutionSetUpdateChecked = false;
	for(JobVertex v : jg.getVertices()) {
		if(v.getName().equals("SolutionSet Delta")) {

			// check if input of solution set delta is temped
			TaskConfig tc = new TaskConfig(v.getConfiguration());
			assertTrue(tc.isInputAsynchronouslyMaterialized(0));
			solutionSetUpdateChecked = true;
		}
	}
	assertTrue(solutionSetUpdateChecked);

}
 
Example #30
Source File: PreviewPlanEnvironment.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public String getExecutionPlan() throws Exception {
	Plan plan = createProgramPlan("unused");
	this.previewPlan = Optimizer.createPreOptimizedPlan(plan);

	// do not go on with anything now!
	throw new OptimizerPlanEnvironment.ProgramAbortException();
}