org.apache.flink.runtime.operators.shipping.ShipStrategyType Java Examples

The following examples show how to use org.apache.flink.runtime.operators.shipping.ShipStrategyType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsonMapper.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static String getShipStrategyString(ShipStrategyType shipType) {
	if (shipType == null) {
		return "(null)";
	}
	switch (shipType) {
		case NONE:
			return "(none)";
		case FORWARD:
			return "Forward";
		case BROADCAST:
			return "Broadcast";
		case PARTITION_HASH:
			return "Hash Partition";
		case PARTITION_RANGE:
			return "Range Partition";
		case PARTITION_RANDOM:
			return "Redistribute";
		case PARTITION_FORCED_REBALANCE:
			return "Rebalance";
		case PARTITION_CUSTOM:
			return "Custom Partition";
		default:
			return shipType.name();
	}
}
 
Example #2
Source File: OutputEmitterTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private boolean verifyWrongPartitionHashKey(int position, int fieldNum) {
	final TypeComparator<Record> comparator = new RecordComparatorFactory(
		new int[] {position}, new Class[] {IntValue.class}).createComparator();
	final ChannelSelector<SerializationDelegate<Record>> selector = createChannelSelector(
		ShipStrategyType.PARTITION_HASH, comparator, 100);
	final SerializationDelegate<Record> delegate = new SerializationDelegate<>(new RecordSerializerFactory().getSerializer());

	Record record = new Record(2);
	record.setField(fieldNum, new IntValue(1));
	delegate.setInstance(record);

	try {
		selector.selectChannel(delegate);
	} catch (NullKeyFieldException re) {
		Assert.assertEquals(position, re.getFieldNumber());
		return true;
	}
	return false;
}
 
Example #3
Source File: BinaryUnionReplacer.java    From flink with Apache License 2.0 6 votes vote down vote up
public void collect(Channel in, List<Channel> inputs) {
	if (in.getSource() instanceof NAryUnionPlanNode) {
		// sanity check
		if (in.getShipStrategy() != ShipStrategyType.FORWARD) {
			throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators.");
		}
		if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) {
			throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators.");
		}

		inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs());
	} else {
		// is not a collapsed union node, so we take the channel directly
		inputs.add(in);
	}
}
 
Example #4
Source File: OptimizerNode.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This function connects the operators that produce the broadcast inputs to this operator.
 *
 * @param operatorToNode The map from program operators to optimizer nodes.
 * @param defaultExchangeMode The data exchange mode to use, if the operator does not
 *                            specify one.
 *
 * @throws CompilerException
 */
public void setBroadcastInputs(Map<Operator<?>, OptimizerNode> operatorToNode, ExecutionMode defaultExchangeMode) {
	// skip for Operators that don't support broadcast variables 
	if (!(getOperator() instanceof AbstractUdfOperator<?, ?>)) {
		return;
	}

	// get all broadcast inputs
	AbstractUdfOperator<?, ?> operator = ((AbstractUdfOperator<?, ?>) getOperator());

	// create connections and add them
	for (Map.Entry<String, Operator<?>> input : operator.getBroadcastInputs().entrySet()) {
		OptimizerNode predecessor = operatorToNode.get(input.getValue());
		DagConnection connection = new DagConnection(predecessor, this,
														ShipStrategyType.BROADCAST, defaultExchangeMode);
		addBroadcastConnection(input.getKey(), connection);
		predecessor.addOutgoingConnection(connection);
	}
}
 
Example #5
Source File: DualInputPlanNode.java    From flink with Apache License 2.0 6 votes vote down vote up
public DualInputPlanNode(OptimizerNode template, String nodeName, Channel input1, Channel input2, DriverStrategy diverStrategy,
		FieldList driverKeyFields1, FieldList driverKeyFields2, boolean[] driverSortOrders)
{
	super(template, nodeName, diverStrategy);
	this.input1 = input1;
	this.input2 = input2;
	this.keys1 = driverKeyFields1;
	this.keys2 = driverKeyFields2;
	this.sortOrders = driverSortOrders;
	
	if (this.input1.getShipStrategy() == ShipStrategyType.BROADCAST) {
		this.input1.setReplicationFactor(getParallelism());
	}
	if (this.input2.getShipStrategy() == ShipStrategyType.BROADCAST) {
		this.input2.setReplicationFactor(getParallelism());
	}
	
	mergeBranchPlanMaps(input1.getSource(), input2.getSource());
}
 
Example #6
Source File: DualInputPlanNode.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public DualInputPlanNode(OptimizerNode template, String nodeName, Channel input1, Channel input2, DriverStrategy diverStrategy,
		FieldList driverKeyFields1, FieldList driverKeyFields2, boolean[] driverSortOrders)
{
	super(template, nodeName, diverStrategy);
	this.input1 = input1;
	this.input2 = input2;
	this.keys1 = driverKeyFields1;
	this.keys2 = driverKeyFields2;
	this.sortOrders = driverSortOrders;
	
	if (this.input1.getShipStrategy() == ShipStrategyType.BROADCAST) {
		this.input1.setReplicationFactor(getParallelism());
	}
	if (this.input2.getShipStrategy() == ShipStrategyType.BROADCAST) {
		this.input2.setReplicationFactor(getParallelism());
	}
	
	mergeBranchPlanMaps(input1.getSource(), input2.getSource());
}
 
Example #7
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
 * Expected to re-establish partitioning between map and reduce via a local hash.
 */
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
	final int p = DEFAULT_PARALLELISM * 2;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertTrue("Invalid ship strategy for an operator.", 
			(ShipStrategyType.PARTITION_RANDOM ==  mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || 
			(ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
 
Example #8
Source File: OutputEmitterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private int[] getSelectedChannelsHitCount(
		ShipStrategyType shipStrategyType,
		int numRecords,
		int numberOfChannels,
		Enum recordType) {
	final TypeComparator<Record> comparator = new RecordComparatorFactory(
		new int[] {0}, new Class[] {recordType == RecordType.INTEGER ? IntValue.class : StringValue.class}).createComparator();
	final ChannelSelector<SerializationDelegate<Record>> selector = createChannelSelector(shipStrategyType, comparator, numberOfChannels);
	final SerializationDelegate<Record> delegate = new SerializationDelegate<>(new RecordSerializerFactory().getSerializer());

	return getSelectedChannelsHitCount(selector, delegate, recordType, numRecords, numberOfChannels);
}
 
Example #9
Source File: DistinctAndGroupingOptimizerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDistinctDestroysPartitioningOfNonDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(1)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer must repartition, because it works on a different field
		assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());

		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #10
Source File: GroupingPojoTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleGroupReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo2> data = env.fromElements(new Pojo2())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.reduceGroup(new IdentityGroupReducerCombinable<Pojo2>())
				.output(new DiscardingOutputFormat<Pojo2>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #11
Source File: ReplicatingDataSourceTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind map.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.map(new IdMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example #12
Source File: DistinctAndGroupingOptimizerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDistinctPreservesPartitioningOfDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(0)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer can be forward, reuses partitioning from distinct
		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #13
Source File: OutputEmitterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private void verifyForwardSelectedChannels(int numRecords, int numberOfChannels, Enum recordType) {
	int[] hits = getSelectedChannelsHitCount(ShipStrategyType.FORWARD, numRecords, numberOfChannels, recordType);

	assertTrue(hits[0] == numRecords);
	for (int i = 1; i < hits.length; i++) {
		assertTrue(hits[i] == 0);
	}
}
 
Example #14
Source File: JoinTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartitionHashFirstTest() {
	try {
		DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_HASH_FIRST);
		assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy());
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, node.getDriverStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getClass().getSimpleName() + ": " + e.getMessage());
	}
}
 
Example #15
Source File: GroupingPojoTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleGroupReduceSorted() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo3> data = env.fromElements(new Pojo3())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.sortGroup("b", Order.ASCENDING)
				.reduceGroup(new IdentityGroupReducerCombinable<Pojo3>())
				.output(new DiscardingOutputFormat<Pojo3>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #16
Source File: JoinTranslationTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testBroadcastHashFirstTest() {
	try {
		DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.BROADCAST_HASH_FIRST);
		assertEquals(ShipStrategyType.BROADCAST, node.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, node.getInput2().getShipStrategy());
		assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, node.getDriverStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getClass().getSimpleName() + ": " + e.getMessage());
	}
}
 
Example #17
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source.
 */
@Test
public void checkJoinWithReplicatedSourceInput() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example #18
Source File: AllGroupWithPartialPreGroupProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
	if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
		// locally connected, directly instantiate
		return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")",
										in, DriverStrategy.ALL_GROUP_REDUCE);
	} else {
		// non forward case.plug in a combiner
		Channel toCombiner = new Channel(in.getSource());
		toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		
		// create an input node for combine with same parallelism as input node
		GroupReduceNode combinerNode = ((GroupReduceNode) node).getCombinerUtilityNode();
		combinerNode.setParallelism(in.getSource().getParallelism());

		SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode,
				"Combine ("+node.getOperator().getName()+")", toCombiner, DriverStrategy.ALL_GROUP_REDUCE_COMBINE);
		combiner.setCosts(new Costs(0, 0));
		combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
		
		Channel toReducer = new Channel(combiner);
		toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(),
									in.getShipStrategySortOrder(), in.getDataExchangeMode());

		toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
		return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")",
										toReducer, DriverStrategy.ALL_GROUP_REDUCE);
	}
}
 
Example #19
Source File: ReplicatingDataSourceTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Tests join program with replicated data source behind flatMap.
 */
@Test
public void checkJoinWithReplicatedSourceInputBehindFlatMap() {

	ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);

	TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
	ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
			new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));

	DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
	DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);

	DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
			.flatMap(new IdFlatMap())
			.join(source2).where("*").equalTo("*")
			.writeAsText("/some/newpath");

	Plan plan = env.createProgramPlan();

	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);

	// check the optimized Plan
	// when join should have forward strategy on both sides
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();

	ShipStrategyType joinIn1 = joinNode.getInput1().getShipStrategy();
	ShipStrategyType joinIn2 = joinNode.getInput2().getShipStrategy();

	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn1);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, joinIn2);
}
 
Example #20
Source File: DagConnection.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor to create a result from an operator that is not
 * consumed by another operator.
 * 
 * @param source
 *        The source node.
 * @param exchangeMode
 *        The data exchange mode (pipelined / batch / batch only for shuffles / ... )
 */
public DagConnection(OptimizerNode source, ExecutionMode exchangeMode) {
	if (source == null) {
		throw new NullPointerException("Source must not be null.");
	}
	this.source = source;
	this.target = null;
	this.shipStrategy = ShipStrategyType.NONE;
	this.dataExchangeMode = exchangeMode;
}
 
Example #21
Source File: DistinctAndGroupingOptimizerTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testDistinctPreservesPartitioningOfDistinctFields() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(4);
		
		@SuppressWarnings("unchecked")
		DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L))
				.map(new IdentityMapper<Tuple2<Long,Long>>()).setParallelism(4);
		
		data.distinct(0)
			.groupBy(0)
			.sum(1)
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		
		// reducer can be forward, reuses partitioning from distinct
		assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
		
		// distinct reducer is partitioned
		assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #22
Source File: GroupingPojoTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		DataSet<Pojo2> data = env.fromElements(new Pojo2())
				.rebalance().setParallelism(4);

		data.groupBy("a").withPartitioner(new TestPartitionerInt())
				.reduce(new SelectOneReducer<Pojo2>())
				.output(new DiscardingOutputFormat<Pojo2>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();

		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #23
Source File: ParallelismChangeTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
 * 
 * Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st reduce is not reusable.
 * Expected to re-establish partitioning between map and reduce (hash).
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
	final int p = DEFAULT_PARALLELISM;

	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(p);
	DataSet<Long> set1 = env.generateSequence(0,1).setParallelism(p);

	set1.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map1")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p).name("Reduce1")
			.map(new IdentityMapper<Long>())
			.withForwardedFields("*").setParallelism(p).name("Map2")
			.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>())
			.withForwardedFields("*").setParallelism(p * 2).name("Reduce2")
			.output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");

	Plan plan = env.createProgramPlan();
	
	// submit the plan to the compiler
	OptimizedPlan oPlan = compileNoStats(plan);
	
	// check the optimized Plan
	// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
	// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
	// mapper respectively reducer
	SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
	SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
	SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
	
	ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
	ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
	
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
	Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
 
Example #24
Source File: GroupingTupleTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomPartitioningTupleReduce() {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		
		DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0))
				.rebalance().setParallelism(4);
		
		data.groupBy(0).withPartitioner(new TestPartitionerInt())
			.reduce(new SelectOneReducer<Tuple2<Integer,Integer>>())
			.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		SinkPlanNode sink = op.getDataSinks().iterator().next();
		SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
		SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy());
		assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #25
Source File: ChainTaskTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchTaskOutputInCloseMethod() {
	final int numChainedTasks = 10;
	final int keyCnt = 100;
	final int valCnt = 10;
	try {
		initEnvironment(MEMORY_MANAGER_SIZE, NETWORK_BUFFER_SIZE);
		addInput(new UniformRecordGenerator(keyCnt, valCnt, false), 0);
		addOutput(outList);
		registerTask(FlatMapDriver.class, MockMapStub.class);
		for (int i = 0; i < numChainedTasks; i++) {
			final TaskConfig taskConfig = new TaskConfig(new Configuration());
			taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
			taskConfig.setOutputSerializer(serFact);
			taskConfig.setStubWrapper(
				new UserCodeClassWrapper<>(MockDuplicateLastValueMapFunction.class));
			getTaskConfig().addChainedTask(
				ChainedFlatMapDriver.class, taskConfig, "chained-" + i);
		}
		final BatchTask<FlatMapFunction<Record, Record>, Record> testTask =
			new BatchTask<>(mockEnv);
		testTask.invoke();
		Assert.assertEquals(keyCnt * valCnt + numChainedTasks, outList.size());
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example #26
Source File: FeedbackPropertiesMatchTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoPartialSolutionFoundTwoInputOperator() {
	try {
		SourcePlanNode target = new SourcePlanNode(getSourceNode(), "Partial Solution");

		SourcePlanNode source1 = new SourcePlanNode(getSourceNode(), "Source 1");
		SourcePlanNode source2 = new SourcePlanNode(getSourceNode(), "Source 2");
		
		Channel toMap1 = new Channel(source1);
		toMap1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		toMap1.setLocalStrategy(LocalStrategy.NONE);
		SingleInputPlanNode map1 = new SingleInputPlanNode(getMapNode(), "Mapper 1", toMap1, DriverStrategy.MAP);
		
		Channel toMap2 = new Channel(source2);
		toMap2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		toMap2.setLocalStrategy(LocalStrategy.NONE);
		SingleInputPlanNode map2 = new SingleInputPlanNode(getMapNode(), "Mapper 2", toMap2, DriverStrategy.MAP);
		
		Channel toJoin1 = new Channel(map1);
		Channel toJoin2 = new Channel(map2);
		
		toJoin1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		toJoin1.setLocalStrategy(LocalStrategy.NONE);
		toJoin2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		toJoin2.setLocalStrategy(LocalStrategy.NONE);
		
		DualInputPlanNode join = new DualInputPlanNode(getJoinNode(), "Join", toJoin1, toJoin2, DriverStrategy.HYBRIDHASH_BUILD_FIRST);
		
		FeedbackPropertiesMeetRequirementsReport report = join.checkPartialSolutionPropertiesMet(target, new GlobalProperties(), new LocalProperties());
		assertEquals(NO_PARTIAL_SOLUTION, report);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #27
Source File: JoinTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testPartitionSortMergeTest() {
	try {
		DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_SORT_MERGE);
		assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy());
		assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy());
		assertEquals(DriverStrategy.INNER_MERGE, node.getDriverStrategy());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getClass().getSimpleName() + ": " + e.getMessage());
	}
}
 
Example #28
Source File: IterationsCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testTwoWorksetIterationsDirectlyChained() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);
		
		DataSet<Tuple2<Long, Long>> verticesWithInitialId = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(1L, 2L));
		
		DataSet<Tuple2<Long, Long>> firstResult = doDeltaIteration(verticesWithInitialId, edges);
		
		DataSet<Tuple2<Long, Long>> secondResult = doDeltaIteration(firstResult, edges);
		
		secondResult.output(new DiscardingOutputFormat<Tuple2<Long,Long>>());
		
		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);
		
		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof WorksetIterationPlanNode);
		
		WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();
		
		assertEquals(ShipStrategyType.FORWARD, wipn.getInput1().getShipStrategy());

		assertEquals(DataExchangeMode.BATCH, wipn.getInput1().getDataExchangeMode());
		assertEquals(DataExchangeMode.BATCH, wipn.getInput2().getDataExchangeMode());

		assertEquals(TempMode.NONE, wipn.getInput1().getTempMode());
		assertEquals(TempMode.NONE, wipn.getInput2().getTempMode());
		
		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #29
Source File: IterationsCompilerTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testIterationNotPushingWorkOut() throws Exception {
	try {
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(8);

		DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue());

		DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class);

		// Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper
		// to do the hash partitioning between the partial solution node and the join node
		// instead of pushing the partitioning out
		doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>());

		Plan p = env.createProgramPlan();
		OptimizedPlan op = compileNoStats(p);

		assertEquals(1, op.getDataSinks().size());
		assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode);

		BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource();

		// check that work has not been pushed out
		for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) {
			assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
		}

		assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy());

		new JobGraphGenerator().compileJobGraph(op);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #30
Source File: AllGroupWithPartialPreGroupProperties.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public SingleInputPlanNode instantiate(Channel in, SingleInputNode node) {
	if (in.getShipStrategy() == ShipStrategyType.FORWARD) {
		// locally connected, directly instantiate
		return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")",
										in, DriverStrategy.ALL_GROUP_REDUCE);
	} else {
		// non forward case.plug in a combiner
		Channel toCombiner = new Channel(in.getSource());
		toCombiner.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
		
		// create an input node for combine with same parallelism as input node
		GroupReduceNode combinerNode = ((GroupReduceNode) node).getCombinerUtilityNode();
		combinerNode.setParallelism(in.getSource().getParallelism());

		SingleInputPlanNode combiner = new SingleInputPlanNode(combinerNode,
				"Combine ("+node.getOperator().getName()+")", toCombiner, DriverStrategy.ALL_GROUP_REDUCE_COMBINE);
		combiner.setCosts(new Costs(0, 0));
		combiner.initProperties(toCombiner.getGlobalProperties(), toCombiner.getLocalProperties());
		
		Channel toReducer = new Channel(combiner);
		toReducer.setShipStrategy(in.getShipStrategy(), in.getShipStrategyKeys(),
									in.getShipStrategySortOrder(), in.getDataExchangeMode());

		toReducer.setLocalStrategy(in.getLocalStrategy(), in.getLocalStrategyKeys(), in.getLocalStrategySortOrder());
		return new SingleInputPlanNode(node, "GroupReduce ("+node.getOperator().getName()+")",
										toReducer, DriverStrategy.ALL_GROUP_REDUCE);
	}
}