org.apache.flink.optimizer.plantranslate.JobGraphGenerator Java Examples

The following examples show how to use org.apache.flink.optimizer.plantranslate.JobGraphGenerator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReduceAllTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}
 
Example #2
Source File: ClusterClient.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static JobGraph getJobGraph(Configuration flinkConfig, FlinkPlan optPlan, List<URL> jarFiles, List<URL> classpaths, SavepointRestoreSettings savepointSettings) {
	JobGraph job;
	if (optPlan instanceof StreamingPlan) {
		job = ((StreamingPlan) optPlan).getJobGraph();
		job.setSavepointRestoreSettings(savepointSettings);
	} else {
		JobGraphGenerator gen = new JobGraphGenerator(flinkConfig);
		job = gen.compileJobGraph((OptimizedPlan) optPlan);
	}

	for (URL jar : jarFiles) {
		try {
			job.addJar(new Path(jar.toURI()));
		} catch (URISyntaxException e) {
			throw new RuntimeException("URL is invalid. This should not happen.", e);
		}
	}

	job.setClasspaths(classpaths);

	return job;
}
 
Example #3
Source File: HardPlansCompilationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}
 
Example #4
Source File: UnionReplacementTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnionReplacement() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<String> input1 = env.fromElements("test1");
		DataSet<String> input2 = env.fromElements("test2");

		DataSet<String> union = input1.union(input2);

		union.output(new DiscardingOutputFormat<String>());
		union.output(new DiscardingOutputFormat<String>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #5
Source File: DisjointDataFlowsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testDisjointFlows() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		// generate two different flows
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #6
Source File: JsonJobGraphGenerationTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	Plan plan = createProgramPlan(jobName);

	Optimizer pc = new Optimizer(new Configuration());
	OptimizedPlan op = pc.compile(plan);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph);

	// first check that the JSON is valid
	JsonParser parser = new JsonFactory().createJsonParser(jsonPlan);
	while (parser.nextToken() != null) {}

	validator.validateJson(jsonPlan);

	throw new AbortError();
}
 
Example #7
Source File: CachedMatchStrategyCompilerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testRightSideCountercheck() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.CACHED, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #8
Source File: TestEnvironment.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	OptimizedPlan op = compileProgram(jobName);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	for (Path jarFile: jarFiles) {
		jobGraph.addJar(jarFile);
	}

	jobGraph.setClasspaths(new ArrayList<>(classPaths));

	this.lastJobExecutionResult = jobExecutor.executeJobBlocking(jobGraph);
	return this.lastJobExecutionResult;
}
 
Example #9
Source File: IterationCompilerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testIdentityIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
		iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #10
Source File: HardPlansCompilationTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
 * |--------------------------/                  /
 * |--------------------------------------------/
 * 
 * First cross has SameKeyFirst output contract
 */
@Test
public void testTicket158() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.map(new IdentityMapper<Long>()).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2")
			.cross(set1).with(new IdentityCrosser<Long>()).name("Cross2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();
	OptimizedPlan oPlan = compileNoStats(plan);

	JobGraphGenerator jobGen = new JobGraphGenerator();
	jobGen.compileJobGraph(oPlan);
}
 
Example #11
Source File: UnionReplacementTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnionReplacement() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<String> input1 = env.fromElements("test1");
		DataSet<String> input2 = env.fromElements("test2");

		DataSet<String> union = input1.union(input2);

		union.output(new DiscardingOutputFormat<String>());
		union.output(new DiscardingOutputFormat<String>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #12
Source File: DisjointDataFlowsTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testDisjointFlows() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		// generate two different flows
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #13
Source File: CachedMatchStrategyCompilerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This tests whether a HYBRIDHASH_BUILD_SECOND is correctly transformed to a HYBRIDHASH_BUILD_SECOND_CACHED
 * when inside of an iteration an on the static path
 */
@Test
public void testRightSide() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #14
Source File: CachedMatchStrategyCompilerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testRightSideCountercheck() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.CACHED, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #15
Source File: CachedMatchStrategyCompilerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This tests whether a HYBRIDHASH_BUILD_FIRST is correctly transformed to a HYBRIDHASH_BUILD_FIRST_CACHED
 * when inside of an iteration an on the static path
 */
@Test
public void testLeftSide() {
	try {
		
		Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST_CACHED, innerJoin.getDriverStrategy());
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #16
Source File: CachedMatchStrategyCompilerTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testLeftSideCountercheck() {
	try {
		
		Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, innerJoin.getDriverStrategy());
		assertEquals(TempMode.CACHED, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #17
Source File: ReduceAllTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}
 
Example #18
Source File: TestEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	OptimizedPlan op = compileProgram(jobName);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	for (Path jarFile: jarFiles) {
		jobGraph.addJar(jarFile);
	}

	jobGraph.setClasspaths(new ArrayList<>(classPaths));

	this.lastJobExecutionResult = jobExecutor.executeJobBlocking(jobGraph);
	return this.lastJobExecutionResult;
}
 
Example #19
Source File: ClusterClient.java    From flink with Apache License 2.0 6 votes vote down vote up
public static JobGraph getJobGraph(Configuration flinkConfig, FlinkPlan optPlan, List<URL> jarFiles, List<URL> classpaths, SavepointRestoreSettings savepointSettings) {
	JobGraph job;
	if (optPlan instanceof StreamingPlan) {
		job = ((StreamingPlan) optPlan).getJobGraph();
		job.setSavepointRestoreSettings(savepointSettings);
	} else {
		JobGraphGenerator gen = new JobGraphGenerator(flinkConfig);
		job = gen.compileJobGraph((OptimizedPlan) optPlan);
	}

	for (URL jar : jarFiles) {
		try {
			job.addJar(new Path(jar.toURI()));
		} catch (URISyntaxException e) {
			throw new RuntimeException("URL is invalid. This should not happen.", e);
		}
	}

	job.setClasspaths(classpaths);

	return job;
}
 
Example #20
Source File: IterationsCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #21
Source File: WorksetIterationCornerCasesTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
		
		DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
		iteration.closeWith(iterEnd, iterEnd)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
		
		JobGraphGenerator jgg = new JobGraphGenerator();
		jgg.compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #22
Source File: IterationCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyWorksetIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20)
				.map(new MapFunction<Long, Tuple2<Long, Long>>() {
					@Override
					public Tuple2<Long, Long> map(Long value){ return null; }
				});
				
				
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
		iter.closeWith(iter.getWorkset(), iter.getWorkset())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #23
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testBranchingUnion() {
	try {
		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);
		DataSet<Long> source2 = env.generateSequence(0,1);

		DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 1");

		DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");

		DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");

		DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");

		DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");

		DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");

		DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3)
				.join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 2");

		join2.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);

		JobGraphGenerator jobGen = new JobGraphGenerator();
		
		//Compile plan to verify that no error is thrown
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #24
Source File: NestedIterationsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBulkIterationInClosure() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Long> data1 = env.generateSequence(1, 100);
		DataSet<Long> data2 = env.generateSequence(1, 100);
		
		IterativeDataSet<Long> firstIteration = data1.iterate(100);
		
		DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
		
		
		IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
		
		DataSet<Long> joined = mainIteration.join(firstResult)
				.where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>())
				.with(new DummyFlatJoinFunction<Long>());
		
		DataSet<Long> mainResult = mainIteration.closeWith(joined);
		
		mainResult.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		
		// optimizer should be able to translate this
		OptimizedPlan op = compileNoStats(p);
		
		// job graph generator should be able to translate this
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #25
Source File: CachedMatchStrategyCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * This test simulates a join of a big left side with a small right side inside of an iteration, where the small side is on a static path.
 * Currently the best execution plan is a HYBRIDHASH_BUILD_SECOND_CACHED, where the small side is hashed and cached.
 * This test also makes sure that all relevant plans are correctly enumerated by the optimizer.
 */
@Test
public void testCorrectChoosing() {
	try {
		
		Plan plan = getTestPlanRightStatic("");
		
		SourceCollectorVisitor sourceCollector = new SourceCollectorVisitor();
		plan.accept(sourceCollector);
		
		for(GenericDataSourceBase<?, ?> s : sourceCollector.getSources()) {
			if(s.getName().equals("bigFile")) {
				this.setSourceStatistics(s, 10000000, 1000);
			}
			else if(s.getName().equals("smallFile")) {
				this.setSourceStatistics(s, 100, 100);
			}
		}
		
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy());
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #26
Source File: BranchingPlansCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCostComputationWithMultipleDataSinks() {
	final int SINKS = 5;

	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		DataSet<Long> source = env.generateSequence(1, 10000);

		DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
		DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());

		for (int sink = 0; sink < SINKS; sink++) {
			mappedA.output(new DiscardingOutputFormat<Long>());
			mappedC.output(new DiscardingOutputFormat<Long>());
		}

		Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
		OptimizedPlan oPlan = compileNoStats(plan);

		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #27
Source File: AccumulatorLiveITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}
 
Example #28
Source File: StreamingJobGraphGenerator.java    From flink with Apache License 2.0 5 votes vote down vote up
private JobGraph createJobGraph() {

		// make sure that all vertices start immediately
		jobGraph.setScheduleMode(streamGraph.getScheduleMode());

		// Generate deterministic hashes for the nodes in order to identify them across
		// submission iff they didn't change.
		Map<Integer, byte[]> hashes = defaultStreamGraphHasher.traverseStreamGraphAndGenerateHashes(streamGraph);

		// Generate legacy version hashes for backwards compatibility
		List<Map<Integer, byte[]>> legacyHashes = new ArrayList<>(legacyStreamGraphHashers.size());
		for (StreamGraphHasher hasher : legacyStreamGraphHashers) {
			legacyHashes.add(hasher.traverseStreamGraphAndGenerateHashes(streamGraph));
		}

		Map<Integer, List<Tuple2<byte[], byte[]>>> chainedOperatorHashes = new HashMap<>();

		setChaining(hashes, legacyHashes, chainedOperatorHashes);

		setPhysicalEdges();

		setSlotSharingAndCoLocation();

		configureCheckpointing();

		JobGraphGenerator.addUserArtifactEntries(streamGraph.getUserArtifacts(), jobGraph);

		// set the ExecutionConfig last when it has been finalized
		try {
			jobGraph.setExecutionConfig(streamGraph.getExecutionConfig());
		}
		catch (IOException e) {
			throw new IllegalConfigurationException("Could not serialize the ExecutionConfig." +
					"This indicates that non-serializable types (like custom serializers) were registered");
		}

		return jobGraph;
	}
 
Example #29
Source File: MultipleJoinsWithSolutionSetCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiSolutionSetJoinPlan() {
	try {

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0));
		DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10);

		// add two sinks, to test the case of branching after an iteration
		result.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());
		result.output(new DiscardingOutputFormat<Tuple2<Long, Double>>());

		Plan p = env.createProgramPlan();

		OptimizedPlan optPlan = compileNoStats(p);

		OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);

		DualInputPlanNode join1 = or.getNode(JOIN_1);
		DualInputPlanNode join2 = or.getNode(JOIN_2);

		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, join1.getDriverStrategy());
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, join2.getDriverStrategy());

		assertEquals(ShipStrategyType.PARTITION_HASH, join1.getInput2().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_HASH, join2.getInput1().getShipStrategy());

		assertEquals(SolutionSetPlanNode.class, join1.getInput1().getSource().getClass());
		assertEquals(SolutionSetPlanNode.class, join2.getInput2().getSource().getClass());

		new JobGraphGenerator().compileJobGraph(optPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test erroneous: " + e.getMessage());
	}
}
 
Example #30
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCostComputationWithMultipleDataSinks() {
	final int SINKS = 5;

	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		DataSet<Long> source = env.generateSequence(1, 10000);

		DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
		DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());

		for (int sink = 0; sink < SINKS; sink++) {
			mappedA.output(new DiscardingOutputFormat<Long>());
			mappedC.output(new DiscardingOutputFormat<Long>());
		}

		Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
		OptimizedPlan oPlan = compileNoStats(plan);

		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}