Java Code Examples for org.apache.flink.api.java.ExecutionEnvironment#createProgramPlan()

The following examples show how to use org.apache.flink.api.java.ExecutionEnvironment#createProgramPlan() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ClientTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.generateSequence(1, 1000).output(new DiscardingOutputFormat<Long>());

	Plan plan = env.createProgramPlan();
	JobWithJars jobWithJars = new JobWithJars(plan, Collections.<URL>emptyList(),  Collections.<URL>emptyList());

	program = mock(PackagedProgram.class);
	when(program.getPlanWithJars()).thenReturn(jobWithJars);

	final int freePort = NetUtils.getAvailablePort();
	config = new Configuration();
	config.setString(JobManagerOptions.ADDRESS, "localhost");
	config.setInteger(JobManagerOptions.PORT, freePort);
	config.setString(AkkaOptions.ASK_TIMEOUT, AkkaOptions.ASK_TIMEOUT.defaultValue());
}
 
Example 2
Source File: ExecutionPlanAfterExecutionTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreatePlanAfterGetExecutionPlan() {
	ExecutionEnvironment env = new LocalEnvironment();

	DataSet<Integer> baseSet = env.fromElements(1, 2);

	DataSet<Integer> result = baseSet.map(new MapFunction<Integer, Integer>() {
		@Override public Integer map(Integer value) throws Exception {
			return value * 2;
		}});
	result.output(new DiscardingOutputFormat<Integer>());

	try {
		env.getExecutionPlan();
		env.createProgramPlan();
	} catch (Exception e) {
		e.printStackTrace();
		fail("Cannot run both #getExecutionPlan and #execute. Message: " + e.getMessage());
	}
}
 
Example 3
Source File: SemanticPropertiesTranslationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUnaryFunctionForwardedInLine3() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
	input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2")
			.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
	Plan plan = env.createProgramPlan();

	GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
	MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();

	SingleInputSemanticProperties semantics = mapper.getSemanticProperties();

	FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
	FieldSet fw2 = semantics.getForwardingTargetFields(0, 2);
	assertNotNull(fw1);
	assertNotNull(fw2);
	assertTrue(fw1.contains(1));
	assertTrue(fw2.contains(2));
}
 
Example 4
Source File: ReduceTranslationTests.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateNonGroupedReduce() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);

		initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() {
			public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) {
				return value1;
			}
		}).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0);

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 5
Source File: WorksetIterationCornerCasesTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWorksetIterationNotDependingOnSolutionSet() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>());
		
		DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1);
		
		DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>());
		iteration.closeWith(iterEnd, iterEnd)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty());
		
		JobGraphGenerator jgg = new JobGraphGenerator();
		jgg.compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 6
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * <pre>
 *       (SRC A)         (SRC B)          (SRC C)
 *      /       \       /                /       \
 *  (SINK 1) (DELTA ITERATION)          |     (SINK 2)
 *             /    |   \               /
 *         (SINK 3) |   (CROSS => NEXT WORKSET)
 *                  |             |
 *                (JOIN => SOLUTION SET DELTA)
 * </pre>
 */
@Test
public void testClosureDeltaIteration() {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(0,1).map(new Duplicator<Long>());
	DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(0,1).map(new Duplicator<Long>());

	sourceA.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
	sourceC.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = sourceA.iterateDelta(sourceB, 10, 0);

	DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().cross(sourceB).with(new IdentityCrosser<Tuple2<Long, Long>>()).name("Next work set");
	DataSet<Tuple2<Long, Long>> delta = workset.join(loop.getSolutionSet()).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");

	DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
	result.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

	Plan plan = env.createProgramPlan();

	try{
		compileNoStats(plan);
	}catch(Exception e){
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 7
Source File: DistinctTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void translateDistinctPlain2() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<CustomType> initialData = getSourcePojoDataSet(env);

		initialData.distinct().output(new DiscardingOutputFormat<CustomType>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		// currently distinct is translated to a Reduce
		ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();

		// check types
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
		assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());

		// check keys
		assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0));

		// parallelism was not configured on the operator
		assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 8
Source File: AggregateTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void translateAggregate() {
	try {
		final int parallelism = 8;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Double, StringValue, Long>> initialData =
				env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));

		initialData.groupBy(0).aggregate(Aggregations.MIN, 1).and(Aggregations.SUM, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, Long>>());

		Plan p = env.createProgramPlan();

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

		GroupReduceOperatorBase<?, ?, ?> reducer = (GroupReduceOperatorBase<?, ?, ?>) sink.getInput();

		// check keys
		assertEquals(1, reducer.getKeyColumns(0).length);
		assertEquals(0, reducer.getKeyColumns(0)[0]);

		assertEquals(-1, reducer.getParallelism());
		assertTrue(reducer.isCombinable());

		assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 9
Source File: AdditionalOperatorsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCrossWithLarge() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);
	DataSet<Long> set2 = env.generateSequence(0,1);

	set1.crossWithHuge(set2).name("Cross")
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

	try {
		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan);
		
		DualInputPlanNode crossPlanNode = resolver.getNode("Cross");
		Channel in1 = crossPlanNode.getInput1();
		Channel in2 = crossPlanNode.getInput2();
		
		assertEquals(ShipStrategyType.BROADCAST, in1.getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, in2.getShipStrategy());
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The pact compiler is unable to compile this plan correctly.");
	}
}
 
Example 10
Source File: AdditionalOperatorsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCrossWithSmall() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);
	DataSet<Long> set2 = env.generateSequence(0,1);

	set1.crossWithTiny(set2).name("Cross")
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

	try {
		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileWithStats(plan);
		OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan);
		
		DualInputPlanNode crossPlanNode = resolver.getNode("Cross");
		Channel in1 = crossPlanNode.getInput1();
		Channel in2 = crossPlanNode.getInput2();
		
		assertEquals(ShipStrategyType.FORWARD, in1.getShipStrategy());
		assertEquals(ShipStrategyType.BROADCAST, in2.getShipStrategy());
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The Flink optimizer is unable to compile this plan correctly.");
	}
}
 
Example 11
Source File: GroupingTupleTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleAgg() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0))
				.rebalance().setParallelism(4);
		
		data.groupBy(0).withPartitioner(new TestPartitionerInt())
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 12
Source File: IterationsCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 13
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind filter.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFilter() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.filter(new NoFilter())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 14
Source File: CachedMatchStrategyCompilerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private Plan getTestPlanLeftStatic(String strategy) {
	
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	
	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> bigInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L),
			new Tuple3<Long, Long, Long>(1L, 2L, 3L),new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Big");
	
	@SuppressWarnings("unchecked")
	DataSet<Tuple3<Long, Long, Long>> smallInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Small");
	
	IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
	
	Configuration joinStrategy = new Configuration();
	joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
	
	DataSet<Tuple3<Long, Long, Long>> inner = smallInput.join(iteration).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);

	DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
	
	output.output(new DiscardingOutputFormat<Tuple3<Long,Long,Long>>());
	
	return env.createProgramPlan();
	
}
 
Example 15
Source File: ReduceCompilationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testAllReduceNoCombiner() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Double> data = env.fromElements(0.2, 0.3, 0.4, 0.5).name("source");
		
		data.reduce(new RichReduceFunction<Double>() {
			
			@Override
			public Double reduce(Double value1, Double value2){
				return value1 + value2;
			}
		}).name("reducer")
		.output(new DiscardingOutputFormat<Double>()).name("sink");
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
		
		
		// the all-reduce has no combiner, when the parallelism of the input is one
		
		SourcePlanNode sourceNode = resolver.getNode("source");
		SingleInputPlanNode reduceNode = resolver.getNode("reducer");
		SinkPlanNode sinkNode = resolver.getNode("sink");
		
		// check wiring
		assertEquals(sourceNode, reduceNode.getInput().getSource());
		assertEquals(reduceNode, sinkNode.getInput().getSource());
		
		// check parallelism
		assertEquals(1, sourceNode.getParallelism());
		assertEquals(1, reduceNode.getParallelism());
		assertEquals(1, sinkNode.getParallelism());
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
	}
}
 
Example 16
Source File: SortPartialReuseTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testPartialPartitioningReuse() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
		
		input
			.partitionByHash(0)
			.map(new IdentityMapper<Tuple3<Long,Long,Long>>()).withForwardedFields("0", "1", "2")
			
			.groupBy(0, 1)
			.reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long,Long,Long>>()).withForwardedFields("0", "1", "2")
			
			.groupBy(0)
			.reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long,Long,Long>>())

			.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer2 = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode reducer1 = (SingleInputPlanNode) reducer2.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

		// should be locally forwarding, reusing sort and partitioning
		assertEquals(ShipStrategyType.FORWARD, reducer2.getInput().getShipStrategy());
		assertEquals(LocalStrategy.NONE, reducer2.getInput().getLocalStrategy());
		
		assertEquals(ShipStrategyType.FORWARD, reducer1.getInput().getShipStrategy());
		assertEquals(LocalStrategy.COMBININGSORT, reducer1.getInput().getLocalStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 17
Source File: UnionTranslationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void translateUnion2Group() {
	try {
		final int parallelism = 4;
		ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);

		DataSet<Tuple3<Double, StringValue, LongValue>> dataset1 = getSourceDataSet(env, 3);

		DataSet<Tuple3<Double, StringValue, LongValue>> dataset2 = getSourceDataSet(env, 2);

		dataset1.union(dataset2)
				.groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "")
				.reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {})
				.returns(String.class)
				.output(new DiscardingOutputFormat<>());

		Plan p = env.createProgramPlan();

		// The plan should look like the following one.
		//
		// DataSet1(3) - MapOperator(3)-+
		//	                            |- Union(-1) - SingleInputOperator - Sink
		// DataSet2(2) - MapOperator(2)-+

		GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
		Union unionOperator = (Union) ((SingleInputOperator) sink.getInput()).getInput();

		// The key mappers should be added to both of the two input streams for union.
		assertTrue(unionOperator.getFirstInput() instanceof MapOperatorBase<?, ?, ?>);
		assertTrue(unionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);

		// The parallelisms of the key mappers should be equal to those of their inputs.
		assertEquals(unionOperator.getFirstInput().getParallelism(), 3);
		assertEquals(unionOperator.getSecondInput().getParallelism(), 2);

		// The union should always have the default parallelism.
		assertEquals(unionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail("Test caused an error: " + e.getMessage());
	}
}
 
Example 18
Source File: ReduceCompilationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testGroupedReduceWithHint() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class)
			.name("source").setParallelism(6);

		data
			.groupBy(new KeySelector<Tuple2<String,Double>, String>() {
				public String getKey(Tuple2<String, Double> value) { return value.f0; }
			})
			.reduce(new RichReduceFunction<Tuple2<String,Double>>() {
				@Override
				public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2){
					return null;
				}
			}).setCombineHint(CombineHint.HASH).name("reducer")
			.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);

		// get the original nodes
		SourcePlanNode sourceNode = resolver.getNode("source");
		SingleInputPlanNode reduceNode = resolver.getNode("reducer");
		SinkPlanNode sinkNode = resolver.getNode("sink");

		// get the combiner
		SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();

		// get the key extractors and projectors
		SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
		SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();

		// check wiring
		assertEquals(sourceNode, keyExtractor.getInput().getSource());
		assertEquals(keyProjector, sinkNode.getInput().getSource());

		// check the strategies
		assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
		assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());

		// check the keys
		assertEquals(new FieldList(0), reduceNode.getKeys(0));
		assertEquals(new FieldList(0), combineNode.getKeys(0));
		assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());

		// check parallelism
		assertEquals(6, sourceNode.getParallelism());
		assertEquals(6, keyExtractor.getParallelism());
		assertEquals(6, combineNode.getParallelism());

		assertEquals(8, reduceNode.getParallelism());
		assertEquals(8, keyProjector.getParallelism());
		assertEquals(8, sinkNode.getParallelism());
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
	}
}
 
Example 19
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void testBranchEachContractType() {
	try {
		// construct the plan
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);
		DataSet<Long> sourceA = env.generateSequence(0,1);
		DataSet<Long> sourceB = env.generateSequence(0,1);
		DataSet<Long> sourceC = env.generateSequence(0,1);

		DataSet<Long> map1 = sourceA.map(new IdentityMapper<Long>()).name("Map 1");

		DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");

		DataSet<Long> join1 = sourceB.union(sourceB).union(sourceC)
				.join(sourceC).where("*").equalTo("*")
				.with(new IdentityJoiner<Long>()).name("Join 1");

		DataSet<Long> coGroup1 = sourceA.coGroup(sourceB).where("*").equalTo("*")
				.with(new IdentityCoGrouper<Long>()).name("CoGroup 1");

		DataSet<Long> cross1 = reduce1.cross(coGroup1)
				.with(new IdentityCrosser<Long>()).name("Cross 1");

		DataSet<Long> coGroup2 = cross1.coGroup(cross1).where("*").equalTo("*")
				.with(new IdentityCoGrouper<Long>()).name("CoGroup 2");

		DataSet<Long> coGroup3 = map1.coGroup(join1).where("*").equalTo("*")
				.with(new IdentityCoGrouper<Long>()).name("CoGroup 3");

		DataSet<Long> map2 = coGroup3.map(new IdentityMapper<Long>()).name("Map 2");

		DataSet<Long> coGroup4 = map2.coGroup(join1).where("*").equalTo("*")
				.with(new IdentityCoGrouper<Long>()).name("CoGroup 4");

		DataSet<Long> coGroup5 = coGroup2.coGroup(coGroup1).where("*").equalTo("*")
				.with(new IdentityCoGrouper<Long>()).name("CoGroup 5");

		DataSet<Long> coGroup6 = reduce1.coGroup(coGroup4).where("*").equalTo("*")
				.with(new IdentityCoGrouper<Long>()).name("CoGroup 6");

		DataSet<Long> coGroup7 = coGroup5.coGroup(coGroup6).where("*").equalTo("*")
				.with(new IdentityCoGrouper<Long>()).name("CoGroup 7");

		coGroup7.union(sourceA)
				.union(coGroup3)
				.union(coGroup4)
				.union(coGroup1)
				.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileNoStats(plan);
		
		JobGraphGenerator jobGen = new JobGraphGenerator();
		
		//Compile plan to verify that no error is thrown
		jobGen.compileJobGraph(oPlan);
	} catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 20
Source File: BranchingPlansCompilerTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * 
 * <pre>
 *                (SRC A)  
 *                   |
 *                (MAP A)
 *             /         \   
 *          (MAP B)      (MAP C)
 *           /           /     \
 *        (SINK A)    (SINK B)  (SINK C)
 * </pre>
 */
@SuppressWarnings("unchecked")
@Test
public void testBranchingWithMultipleDataSinks2() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(DEFAULT_PARALLELISM);

		DataSet<Long> source = env.generateSequence(1, 10000);

		DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
		DataSet<Long> mappedB = mappedA.map(new IdentityMapper<Long>());
		DataSet<Long> mappedC = mappedA.map(new IdentityMapper<Long>());

		mappedB.output(new DiscardingOutputFormat<Long>());
		mappedC.output(new DiscardingOutputFormat<Long>());
		mappedC.output(new DiscardingOutputFormat<Long>());

		Plan plan = env.createProgramPlan();
		Set<Operator<?>> sinks = new HashSet<Operator<?>>(plan.getDataSinks());

		OptimizedPlan oPlan = compileNoStats(plan);

		// ---------- check the optimizer plan ----------

		// number of sinks
		assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());

		// remove matching sinks to check relation
		for (SinkPlanNode sink : oPlan.getDataSinks()) {
			assertTrue(sinks.remove(sink.getProgramOperator()));
		}
		assertTrue(sinks.isEmpty());

		new JobGraphGenerator().compileJobGraph(oPlan);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}