Java Code Examples for org.apache.flink.optimizer.plan.SinkPlanNode#getPredecessor()

The following examples show how to use org.apache.flink.optimizer.plan.SinkPlanNode#getPredecessor() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource3() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0)
			.splitsOrderedBy(new int[]{1}, new Order[]{Order.ASCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 2
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource8() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1")
			.splitsGroupedBy("f1.stringField");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 3
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map partition.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMapPartition() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.mapPartition(new IdPMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 4
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsOrderedBy(new int[]{1}, new Order[]{Order.ASCENDING});

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue((new FieldSet(lprops.getGroupedFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 5
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
 
Example 6
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource3() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(1)
			.splitsGroupedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 7
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind multiple map ops.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMultiMaps() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.filter(new NoFilter())
			.mapPartition(new IdPMap())
			.flatMap(new IdFlatMap())
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 8
Source File: ReplicatingDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source.
 */
@Test
public void checkJoinWithReplicatedSourceInput() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 9
Source File: ParallelismChangeTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between map and reduce (hash).
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
 
Example 10
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedSource1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 11
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource8() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1")
			.splitsGroupedBy("f1.stringField");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 12
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedGroupedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f2")
			.splitsGroupedBy("f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 13
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedOrderedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField")
			.splitsOrderedBy("f1", new Order[]{Order.DESCENDING});

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(1,2,3)));
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 14
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void checkSinglePartitionedSource1() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple2<Long, String>> data =
			env.readCsvFile("/some/path").types(Long.class, String.class);

	data.getSplitDataProperties()
			.splitsPartitionedBy(0);

	data.output(new DiscardingOutputFormat<Tuple2<Long,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 15
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between reduce and map, via hash, because random is a full network
 * transit as well.
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
				.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType redIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn);
}
 
Example 16
Source File: ReplicatingDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example 17
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test
public void checkSinglePartitionedSource5() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.stringField");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 18
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test
public void checkSinglePartitionedSource6() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1.intField; f2");

	data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(2, 4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 19
Source File: PropertyDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
@Test
public void checkSinglePartitionedSource4() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("f1");

	data.output(new DiscardingOutputFormat<Tuple3<Long,SomePojo,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1, 2, 3)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}
 
Example 20
Source File: PropertyDataSourceTest.java    From flink with Apache License 2.0 3 votes vote down vote up
@Test
public void checkSinglePartitionedSource3() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);

	data.getSplitDataProperties()
			.splitsPartitionedBy("*");

	data.output(new DiscardingOutputFormat<Tuple3<Long,SomePojo,String>>());

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();

	GlobalProperties gprops = sourceNode.getGlobalProperties();
	LocalProperties lprops = sourceNode.getLocalProperties();

	Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1, 2, 3, 4)));
	Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
	Assert.assertTrue(lprops.getGroupedFields() == null);
	Assert.assertTrue(lprops.getOrdering() == null);

}