org.apache.flink.optimizer.plan.OptimizedPlan Java Examples

The following examples show how to use org.apache.flink.optimizer.plan.OptimizedPlan. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReplicatingDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Tests compiler fail for join program with replicated data source behind reduce.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputBehindReduce() {
	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.reduce(new LastReduce())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
}
 
Example #2
Source File: IterationCompilerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testIdentityIteration() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(43);
		
		IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
		iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #3
Source File: TestEnvironment.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public JobExecutionResult execute(String jobName) throws Exception {
	OptimizedPlan op = compileProgram(jobName);

	JobGraphGenerator jgg = new JobGraphGenerator();
	JobGraph jobGraph = jgg.compileJobGraph(op);

	for (Path jarFile: jarFiles) {
		jobGraph.addJar(jarFile);
	}

	jobGraph.setClasspaths(new ArrayList<>(classPaths));

	this.lastJobExecutionResult = jobExecutor.executeJobBlocking(jobGraph);
	return this.lastJobExecutionResult;
}
 
Example #4
Source File: ReduceAllTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}
 
Example #5
Source File: TestUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Verify operator parallelism.
 *
 * @param env the Flink execution environment.
 * @param expectedParallelism expected operator parallelism
 */
public static void verifyParallelism(ExecutionEnvironment env, int expectedParallelism) {
	env.setParallelism(2 * expectedParallelism);

	Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
	OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());

	List<PlanNode> queue = new ArrayList<>();
	queue.addAll(optimizedPlan.getDataSinks());

	while (queue.size() > 0) {
		PlanNode node = queue.remove(queue.size() - 1);

		// Data sources may have parallelism of 1, so simply check that the node
		// parallelism has not been increased by setting the default parallelism
		assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= expectedParallelism);

		for (Channel channel : node.getInputs()) {
			queue.add(channel.getSource());
		}
	}
}
 
Example #6
Source File: ReduceAllTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testReduce() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);

	set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1")
			.output(new DiscardingOutputFormat<Long>()).name("Sink");

	Plan plan = env.createProgramPlan();

	try {
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly");
	}
}
 
Example #7
Source File: ClientTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetExecutionPlan() throws ProgramInvocationException {
	PackagedProgram prg = new PackagedProgram(TestOptimizerPlan.class, "/dev/random", "/tmp");
	assertNotNull(prg.getPreviewPlan());

	Optimizer optimizer = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
	OptimizedPlan op = (OptimizedPlan) ClusterClient.getOptimizedPlan(optimizer, prg, 1);
	assertNotNull(op);

	PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
	assertNotNull(dumper.getOptimizerPlanAsJSON(op));

	// test HTML escaping
	PlanJSONDumpGenerator dumper2 = new PlanJSONDumpGenerator();
	dumper2.setEncodeForHTML(true);
	String htmlEscaped = dumper2.getOptimizerPlanAsJSON(op);

	assertEquals(-1, htmlEscaped.indexOf('\\'));
}
 
Example #8
Source File: UnionReplacementTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnionReplacement() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<String> input1 = env.fromElements("test1");
		DataSet<String> input2 = env.fromElements("test2");

		DataSet<String> union = input1.union(input2);

		union.output(new DiscardingOutputFormat<String>());
		union.output(new DiscardingOutputFormat<String>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #9
Source File: CachedMatchStrategyCompilerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This tests whether a HYBRIDHASH_BUILD_SECOND is correctly transformed to a HYBRIDHASH_BUILD_SECOND_CACHED
 * when inside of an iteration an on the static path
 */
@Test
public void testRightSide() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #10
Source File: UnionReplacementTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnionReplacement() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		DataSet<String> input1 = env.fromElements("test1");
		DataSet<String> input2 = env.fromElements("test2");

		DataSet<String> union = input1.union(input2);

		union.output(new DiscardingOutputFormat<String>());
		union.output(new DiscardingOutputFormat<String>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		JobGraphGenerator jobGen = new JobGraphGenerator();
		jobGen.compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #11
Source File: CachedMatchStrategyCompilerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant
 */
@Test
public void testRightSideCountercheck() {
	try {
		
		Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, innerJoin.getDriverStrategy()); 
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.CACHED, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #12
Source File: CachedMatchStrategyCompilerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This tests whether a HYBRIDHASH_BUILD_FIRST is correctly transformed to a HYBRIDHASH_BUILD_FIRST_CACHED
 * when inside of an iteration an on the static path
 */
@Test
public void testLeftSide() {
	try {
		
		Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST);
		
		OptimizedPlan oPlan = compileNoStats(plan);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
		DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
		
		// verify correct join strategy
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST_CACHED, innerJoin.getDriverStrategy());
		assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
		assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
	
		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test errored: " + e.getMessage());
	}
}
 
Example #13
Source File: DisjointDataFlowsTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testDisjointFlows() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		// generate two different flows
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		env.generateSequence(1, 10)
				.output(new DiscardingOutputFormat<Long>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #14
Source File: PlanJSONDumpGenerator.java    From flink with Apache License 2.0 5 votes vote down vote up
public void dumpOptimizerPlanAsJSON(OptimizedPlan plan, PrintWriter writer) {
	Collection<SinkPlanNode> sinks = plan.getDataSinks();
	if (sinks instanceof List) {
		dumpOptimizerPlanAsJSON((List<SinkPlanNode>) sinks, writer);
	} else {
		List<SinkPlanNode> n = new ArrayList<SinkPlanNode>();
		n.addAll(sinks);
		dumpOptimizerPlanAsJSON(n, writer);
	}
}
 
Example #15
Source File: KMeansSingleStepTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCompileKMeansSingleStepWithOutStats() {

	Plan p = getKMeansPlan();
	p.setExecutionConfig(new ExecutionConfig());
	OptimizedPlan plan = compileNoStats(p);
	checkPlan(plan);
}
 
Example #16
Source File: DistinctAndGroupingOptimizerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testDistinctPreservesPartitioningOfDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(0)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer can be forward, reuses partitioning from distinct
		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #17
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testBranchingUnion() {
	try {
		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> source1 = env.generateSequence(0,1);
		DataSet<Long> source2 = env.generateSequence(0,1);

		DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 1");

		DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");

		DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");

		DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");

		DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");

		DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");

		DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3)
				.join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 2");

		join2.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);

		JobGraphGenerator jobGen = new JobGraphGenerator();
		
		//Compile plan to verify that no error is thrown
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #18
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1, 0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #19
Source File: AccumulatorLiveITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}
 
Example #20
Source File: CoGroupCustomPartitioningTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithKeySelectors() {
	try {
		final Partitioner<Integer> partitioner = new TestPartitionerInt();
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
		DataSet<Pojo3> input2 = env.fromElements(new Pojo3());
		
		input1
			.coGroup(input2)
			.where(new Pojo2KeySelector()).equalTo(new Pojo3KeySelector())
			.withPartitioner(partitioner)
			.with(new DummyCoGroupFunction<Pojo2, Pojo3>())
			.output(new DiscardingOutputFormat<Tuple2<Pojo2, Pojo3>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
		
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
		assertEquals(partitioner, join.getInput1().getPartitioner());
		assertEquals(partitioner, join.getInput2().getPartitioner());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #21
Source File: GroupingTupleTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleAgg() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0))
				.rebalance().setParallelism(4);
		
		data.groupBy(0).withPartitioner(new TestPartitionerInt())
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #22
Source File: PartitionOperatorTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartitionCustomOperatorPreservesFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
		
		data.partitionCustom(new Partitioner<Long>() {
				public int partition(Long key, int numPartitions) { return key.intValue(); }
			}, 1)
			.groupBy(1)
			.reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>())
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode partitioner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitioner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #23
Source File: GroupingKeySelectorTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningKeySelectorGroupReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0))
				.rebalance().setParallelism(4);
		
		data.groupBy(new TestKeySelector<Tuple2<Integer,Integer>>())
			.withPartitioner(new TestPartitionerInt())
				.reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Integer,Integer>>())
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #24
Source File: AccumulatorLiveITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Helpers to generate the JobGraph.
 */
private static JobGraph getJobGraph(Plan plan) {
	Optimizer pc = new Optimizer(new DataStatistics(), new Configuration());
	JobGraphGenerator jgg = new JobGraphGenerator();
	OptimizedPlan op = pc.compile(plan);
	return jgg.compileJobGraph(op);
}
 
Example #25
Source File: GroupingTupleTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleAgg() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0))
				.rebalance().setParallelism(4);
		
		data.groupBy(0).withPartitioner(new TestPartitionerInt())
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #26
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example #27
Source File: DistinctAndGroupingOptimizerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDistinctDestroysPartitioningOfNonDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(1)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer must repartition, because it works on a different field
		assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());

		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #28
Source File: WorksetIterationCornerCasesTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
		
		DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
		iteration.closeWith(iterEnd, iterEnd)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
		
		JobGraphGenerator jgg = new JobGraphGenerator();
		jgg.compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #29
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
 
Example #30
Source File: BroadcastVariablePipelinebreakerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoBreakerForIndependentVariable() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<String> source1 = env.fromElements("test");
		DataSet<String> source2 = env.fromElements("test");
		
		source1.map(new IdentityMapper<String>()).withBroadcastSet(source2, "some name")
				.output(new DiscardingOutputFormat<String>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();
		
		assertEquals(TempMode.NONE, mapper.getInput().getTempMode());
		assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode());
		
		assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode());
		assertEquals(DataExchangeMode.PIPELINED, mapper.getBroadcastInputs().get(0).getDataExchangeMode());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}