org.apache.flink.optimizer.plan.SinkPlanNode Java Examples

The following examples show how to use org.apache.flink.optimizer.plan.SinkPlanNode. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource7() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsGroupedBy("f1");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #2
Source File: PlanJSONDumpGenerator.java    From flink with Apache License 2.0 5 votes vote down vote up
public void dumpOptimizerPlanAsJSON(OptimizedPlan plan, PrintWriter writer) {
	Collection<SinkPlanNode> sinks = plan.getDataSinks();
	if (sinks instanceof List) {
		dumpOptimizerPlanAsJSON((List<SinkPlanNode>) sinks, writer);
	} else {
		List<SinkPlanNode> n = new ArrayList<SinkPlanNode>();
		n.addAll(sinks);
		dumpOptimizerPlanAsJSON(n, writer);
	}
}
 
Example #3
Source File: CoGroupCustomPartitioningTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithTuples() {
	try {
		final Partitioner<Long> partitioner = new TestPartitionerLong();
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
		
		input1
			.coGroup(input2)
			.where(1).equalTo(0)
			.withPartitioner(partitioner)
			.with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>())
			.output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
		
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
		assertEquals(partitioner, join.getInput1().getPartitioner());
		assertEquals(partitioner, join.getInput2().getPartitioner());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #4
Source File: CoGroupCustomPartitioningTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithPojos() {
	try {
		final Partitioner<Integer> partitioner = new TestPartitionerInt();
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
		DataSet<Pojo3> input2 = env.fromElements(new Pojo3());
		
		input1
			.coGroup(input2)
			.where("b").equalTo("a")
			.withPartitioner(partitioner)
			.with(new DummyCoGroupFunction<Pojo2, Pojo3>())
			.output(new DiscardingOutputFormat<Tuple2<Pojo2, Pojo3>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
		
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
		assertEquals(partitioner, join.getInput1().getPartitioner());
		assertEquals(partitioner, join.getInput2().getPartitioner());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #5
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example #6
Source File: PlanFinalizer.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new plan finalizer.
 */
public PlanFinalizer() {
	this.allNodes = new HashSet<PlanNode>();
	this.sources = new ArrayList<SourcePlanNode>();
	this.sinks = new ArrayList<SinkPlanNode>();
	this.stackOfIterationNodes = new ArrayDeque<IterationPlanNode>();
}
 
Example #7
Source File: ParallelismChangeTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between map and reduce (hash).
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
 
Example #8
Source File: ParallelismChangeTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between reduce and map, via hash, because random is a full network
 * transit as well.
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType redIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn);
}
 
Example #9
Source File: DistinctAndGroupingOptimizerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDistinctPreservesPartitioningOfDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(0)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer can be forward, reuses partitioning from distinct
		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #10
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #11
Source File: CoGroupCustomPartitioningTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithKeySelectors() {
	try {
		final Partitioner<Integer> partitioner = new TestPartitionerInt();
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
		DataSet<Pojo3> input2 = env.fromElements(new Pojo3());
		
		input1
			.coGroup(input2)
			.where(new Pojo2KeySelector()).equalTo(new Pojo3KeySelector())
			.withPartitioner(partitioner)
			.with(new DummyCoGroupFunction<Pojo2, Pojo3>())
			.output(new DiscardingOutputFormat<Tuple2<Pojo2, Pojo3>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
		
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
		assertEquals(partitioner, join.getInput1().getPartitioner());
		assertEquals(partitioner, join.getInput2().getPartitioner());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #12
Source File: RelationalQueryCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void checkStandardStrategies(SingleInputPlanNode map, DualInputPlanNode join, SingleInputPlanNode combiner,
		SingleInputPlanNode reducer, SinkPlanNode sink) {
	// check ship strategies that are always fix
	Assert.assertEquals(ShipStrategyType.FORWARD, map.getInput().getShipStrategy());
	Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());

	// check the driver strategies that are always fix
	Assert.assertEquals(DriverStrategy.FLAT_MAP, map.getDriverStrategy());
	Assert.assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reducer.getDriverStrategy());
	Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
	if (combiner != null) {
		Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy());
		Assert.assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy());
	}
}
 
Example #13
Source File: CoGroupCustomPartitioningTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithPojos() {
	try {
		final Partitioner<Integer> partitioner = new TestPartitionerInt();
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Pojo2> input1 = env.fromElements(new Pojo2());
		DataSet<Pojo3> input2 = env.fromElements(new Pojo3());
		
		input1
			.coGroup(input2)
			.where("b").equalTo("a")
			.withPartitioner(partitioner)
			.with(new DummyCoGroupFunction<Pojo2, Pojo3>())
			.output(new DiscardingOutputFormat<Tuple2<Pojo2, Pojo3>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
		
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
		assertEquals(partitioner, join.getInput1().getPartitioner());
		assertEquals(partitioner, join.getInput2().getPartitioner());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #14
Source File: GroupingPojoTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleGroupReduceSorted2() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo4> data = env.fromElements(new Pojo4())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.sortGroup("b", Order.ASCENDING)
				.sortGroup("c", Order.DESCENDING)
				.reduceGroup(new IdentityGroupReducerCombinable<Pojo4>())
				.output(new DiscardingOutputFormat<Pojo4>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #15
Source File: DataExchangeModeClosedBranchingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private SinkPlanNode findSink(Collection<SinkPlanNode> collection, String name) {
	for (SinkPlanNode node : collection) {
		String nodeName = node.getOptimizerNode().getOperator().getName();
		if (nodeName != null && nodeName.equals(name)) {
			return node;
		}
	}

	throw new IllegalArgumentException("No node with that name was found.");
}
 
Example #16
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource3() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsGroupedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #17
Source File: GroupingPojoTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleGroupReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo2> data = env.fromElements(new Pojo2())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.reduceGroup(new IdentityGroupReducerCombinable<Pojo2>())
				.output(new DiscardingOutputFormat<Pojo2>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #18
Source File: DataExchangeModeOpenBranchingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private SinkPlanNode findSink(Collection<SinkPlanNode> collection, String name) {
	for (SinkPlanNode node : collection) {
		String nodeName = node.getOptimizerNode().getOperator().getName();
		if (nodeName != null && nodeName.equals(name)) {
			return node;
		}
	}

	throw new IllegalArgumentException("No node with that name was found.");
}
 
Example #19
Source File: PlanJSONDumpGenerator.java    From flink with Apache License 2.0 5 votes vote down vote up
public void dumpOptimizerPlanAsJSON(OptimizedPlan plan, PrintWriter writer) {
	Collection<SinkPlanNode> sinks = plan.getDataSinks();
	if (sinks instanceof List) {
		dumpOptimizerPlanAsJSON((List<SinkPlanNode>) sinks, writer);
	} else {
		List<SinkPlanNode> n = new ArrayList<SinkPlanNode>();
		n.addAll(sinks);
		dumpOptimizerPlanAsJSON(n, writer);
	}
}
 
Example #20
Source File: PipelineBreakerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testPipelineBreakerWithBroadcastVariable() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
		env.setParallelism(64);
		
		DataSet<Long> source = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
		
		DataSet<Long> result = source.map(new IdentityMapper<Long>())
									.map(new IdentityMapper<Long>())
										.withBroadcastSet(source, "bc");
		
		result.output(new DiscardingOutputFormat<Long>());
		
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode mapperInput = (SingleInputPlanNode) mapper.getInput().getSource();
		
		assertEquals(TempMode.NONE, mapper.getInput().getTempMode());
		assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode());
		
		assertEquals(DataExchangeMode.BATCH, mapperInput.getInput().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode());
		
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #21
Source File: GroupingPojoTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleGroupReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo2> data = env.fromElements(new Pojo2())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.reduceGroup(new IdentityGroupReducerCombinable<Pojo2>())
				.output(new DiscardingOutputFormat<Pojo2>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #22
Source File: PartitionPushdownTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartitioningReused() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
		
		input
			.groupBy(0).sum(1)
			.groupBy(0, 1).sum(2)
			.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		
		SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode agg1Reducer = (SingleInputPlanNode) agg2Reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, agg2Reducer.getInput().getShipStrategy());
		
		assertEquals(ShipStrategyType.PARTITION_HASH, agg1Reducer.getInput().getShipStrategy());
		assertEquals(new FieldList(0), agg1Reducer.getInput().getShipStrategyKeys());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #23
Source File: JavaApiPostPass.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void postPass(OptimizedPlan plan) {

	executionConfig = plan.getOriginalPlan().getExecutionConfig();

	for (SinkPlanNode sink : plan.getDataSinks()) {
		traverse(sink);
	}
}
 
Example #24
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource2() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsOrderedBy(new int[]{1, 0}, new Order[]{Order.ASCENDING, Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue((new FieldSet(lprops.getGroupedFields().toArray())).equals(new FieldSet(1, 0)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #25
Source File: CoGroupCustomPartitioningTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoGroupWithTuples() {
	try {
		final Partitioner<Long> partitioner = new TestPartitionerLong();
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
		DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
		
		input1
			.coGroup(input2)
			.where(1).equalTo(0)
			.withPartitioner(partitioner)
			.with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>())
			.output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		DualInputPlanNode join = (DualInputPlanNode) sink.getInput().getSource();
		
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, join.getInput2().getShipStrategy());
		assertEquals(partitioner, join.getInput1().getPartitioner());
		assertEquals(partitioner, join.getInput2().getPartitioner());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #26
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource7() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsGroupedBy("f1");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example #27
Source File: ReplicatingDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind filter.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFilter() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.filter(new NoFilter())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example #28
Source File: ReplicatingDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind flatMap.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFlatMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.flatMap(new IdFlatMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example #29
Source File: ReplicatingDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example #30
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource8() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1")
			.splitsGroupedBy("f1.stringField");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}