org.apache.flink.streaming.runtime.partitioner.RescalePartitioner Java Examples

The following examples show how to use org.apache.flink.streaming.runtime.partitioner.RescalePartitioner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: StreamingJobGraphGeneratorWithGlobalDataExchangeModeTest.java From flink with Apache License 2.0

6 votes

/**
 * Topology: source(parallelism=1) --(forward)--> map1(parallelism=1)
 *           --(rescale)--> map2(parallelism=2) --(rebalance)--> sink(parallelism=2).
 */
private static StreamGraph createStreamGraph() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.fromElements(1, 2, 3).setParallelism(1);

	final DataStream<Integer> forward = new DataStream<>(env, new PartitionTransformation<>(
		source.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	final DataStream<Integer> map1 = forward.map(i -> i).startNewChain().setParallelism(1);

	final DataStream<Integer> rescale = new DataStream<>(env, new PartitionTransformation<>(
		map1.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	final DataStream<Integer> map2 = rescale.map(i -> i).setParallelism(2);

	map2.rebalance().print().setParallelism(2);

	return env.getStreamGraph();
}

Example #2

Source File: StreamingJobGraphGenerator.java From Flink-CEPplus with Apache License 2.0

5 votes

private void connect(Integer headOfChain, StreamEdge edge) {

		physicalEdgesInOrder.add(edge);

		Integer downStreamvertexID = edge.getTargetId();

		JobVertex headVertex = jobVertices.get(headOfChain);
		JobVertex downStreamVertex = jobVertices.get(downStreamvertexID);

		StreamConfig downStreamConfig = new StreamConfig(downStreamVertex.getConfiguration());

		downStreamConfig.setNumberOfInputs(downStreamConfig.getNumberOfInputs() + 1);

		StreamPartitioner<?> partitioner = edge.getPartitioner();
		JobEdge jobEdge;
		if (partitioner instanceof ForwardPartitioner || partitioner instanceof RescalePartitioner) {
			jobEdge = downStreamVertex.connectNewDataSetAsInput(
				headVertex,
				DistributionPattern.POINTWISE,
				ResultPartitionType.PIPELINED_BOUNDED);
		} else {
			jobEdge = downStreamVertex.connectNewDataSetAsInput(
					headVertex,
					DistributionPattern.ALL_TO_ALL,
					ResultPartitionType.PIPELINED_BOUNDED);
		}
		// set strategy name so that web interface can show it.
		jobEdge.setShipStrategyName(partitioner.toString());

		if (LOG.isDebugEnabled()) {
			LOG.debug("CONNECTED: {} - {} -> {}", partitioner.getClass().getSimpleName(),
					headOfChain, downStreamvertexID);
		}
	}

Example #3

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#PIPELINED}.
 */
@Test
public void testShuffleModePipelined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with PIPELINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// PIPELINED shuffle mode is translated into PIPELINED_BOUNDED result partition
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Example #4

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Example #5

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#UNDEFINED}.
 */
@Test
public void testShuffleModeUndefined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with UNDEFINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// UNDEFINED shuffle mode is translated into PIPELINED_BOUNDED result partition by default
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
		sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Example #6

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Verify that "blockingConnectionsBetweenChains" is off by default.
 */
@Test
public void testBlockingAfterChainingOffDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Filter -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	// partition transformation with an undefined shuffle mode between source and filter
	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
		sourceDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> filterDataStream = partitionAfterSourceDataStream.filter(value -> true).setParallelism(2);

	DataStream<Integer> partitionAfterFilterDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));

	partitionAfterFilterDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex filterAndPrintVertex = verticesSorted.get(1);

	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			filterAndPrintVertex.getInputs().get(0).getSource().getResultType());
}

Example #7

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test enabling the property "blockingConnectionsBetweenChains".
 */
@Test
public void testBlockingConnectionsBetweenChainsEnabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Filter -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	// partition transformation with an undefined shuffle mode between source and filter
	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
		sourceDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> filterDataStream = partitionAfterSourceDataStream.filter(value -> true).setParallelism(2);

	DataStream<Integer> partitionAfterFilterDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterFilterDataStream.map(value -> value).setParallelism(2);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(1);

	StreamGraph streamGraph = env.getStreamGraph();
	streamGraph.setBlockingConnectionsBetweenChains(true);
	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	JobVertex sourceVertex = verticesSorted.get(0);
	// still can be chained
	JobVertex filterAndMapVertex = verticesSorted.get(1);
	JobVertex printVertex = verticesSorted.get(2);

	// the edge with undefined shuffle mode is translated into BLOCKING
	assertEquals(ResultPartitionType.BLOCKING, sourceVertex.getProducedDataSets().get(0).getResultType());
	// the edge with PIPELINED shuffle mode is translated into PIPELINED_BOUNDED
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, filterAndMapVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, printVertex.getInputs().get(0).getSource().getResultType());
}

Example #8

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#PIPELINED}.
 */
@Test
public void testShuffleModePipelined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with PIPELINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// PIPELINED shuffle mode is translated into PIPELINED_BOUNDED result partition
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Example #9

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Example #10

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#UNDEFINED}.
 */
@Test
public void testShuffleModeUndefined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with UNDEFINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// UNDEFINED shuffle mode is translated into PIPELINED_BOUNDED result partition by default
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
		sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Example #11

Source File: StreamingJobGraphGenerator.java From flink with Apache License 2.0

4 votes

private void connect(Integer headOfChain, StreamEdge edge) {

		physicalEdgesInOrder.add(edge);

		Integer downStreamvertexID = edge.getTargetId();

		JobVertex headVertex = jobVertices.get(headOfChain);
		JobVertex downStreamVertex = jobVertices.get(downStreamvertexID);

		StreamConfig downStreamConfig = new StreamConfig(downStreamVertex.getConfiguration());

		downStreamConfig.setNumberOfInputs(downStreamConfig.getNumberOfInputs() + 1);

		StreamPartitioner<?> partitioner = edge.getPartitioner();

		ResultPartitionType resultPartitionType;
		switch (edge.getShuffleMode()) {
			case PIPELINED:
				resultPartitionType = ResultPartitionType.PIPELINED_BOUNDED;
				break;
			case BATCH:
				resultPartitionType = ResultPartitionType.BLOCKING;
				break;
			case UNDEFINED:
				resultPartitionType = streamGraph.isBlockingConnectionsBetweenChains() ?
						ResultPartitionType.BLOCKING : ResultPartitionType.PIPELINED_BOUNDED;
				break;
			default:
				throw new UnsupportedOperationException("Data exchange mode " +
					edge.getShuffleMode() + " is not supported yet.");
		}

		JobEdge jobEdge;
		if (partitioner instanceof ForwardPartitioner || partitioner instanceof RescalePartitioner) {
			jobEdge = downStreamVertex.connectNewDataSetAsInput(
				headVertex,
				DistributionPattern.POINTWISE,
				resultPartitionType);
		} else {
			jobEdge = downStreamVertex.connectNewDataSetAsInput(
					headVertex,
					DistributionPattern.ALL_TO_ALL,
					resultPartitionType);
		}
		// set strategy name so that web interface can show it.
		jobEdge.setShipStrategyName(partitioner.toString());

		if (LOG.isDebugEnabled()) {
			LOG.debug("CONNECTED: {} - {} -> {}", partitioner.getClass().getSimpleName(),
					headOfChain, downStreamvertexID);
		}
	}

Example #12

Source File: StreamingJobGraphGenerator.java From flink with Apache License 2.0

4 votes

private static boolean isPointwisePartitioner(StreamPartitioner<?> partitioner) {
	return partitioner instanceof ForwardPartitioner || partitioner instanceof RescalePartitioner;
}

Example #13

Source File: DataStream.java From Flink-CEPplus with Apache License 2.0

2 votes

/**
 * Sets the partitioning of the {@link DataStream} so that the output elements
 * are distributed evenly to a subset of instances of the next operation in a round-robin
 * fashion.
 *
 * <p>The subset of downstream operations to which the upstream operation sends
 * elements depends on the degree of parallelism of both the upstream and downstream operation.
 * For example, if the upstream operation has parallelism 2 and the downstream operation
 * has parallelism 4, then one upstream operation would distribute elements to two
 * downstream operations while the other upstream operation would distribute to the other
 * two downstream operations. If, on the other hand, the downstream operation has parallelism
 * 2 while the upstream operation has parallelism 4 then two upstream operations will
 * distribute to one downstream operation while the other two upstream operations will
 * distribute to the other downstream operations.
 *
 * <p>In cases where the different parallelisms are not multiples of each other one or several
 * downstream operations will have a differing number of inputs from upstream operations.
 *
 * @return The DataStream with rescale partitioning set.
 */
@PublicEvolving
public DataStream<T> rescale() {
	return setConnectionType(new RescalePartitioner<T>());
}

Example #14

Source File: DataStream.java From flink with Apache License 2.0

2 votes

/**
 * Sets the partitioning of the {@link DataStream} so that the output elements
 * are distributed evenly to a subset of instances of the next operation in a round-robin
 * fashion.
 *
 * <p>The subset of downstream operations to which the upstream operation sends
 * elements depends on the degree of parallelism of both the upstream and downstream operation.
 * For example, if the upstream operation has parallelism 2 and the downstream operation
 * has parallelism 4, then one upstream operation would distribute elements to two
 * downstream operations while the other upstream operation would distribute to the other
 * two downstream operations. If, on the other hand, the downstream operation has parallelism
 * 2 while the upstream operation has parallelism 4 then two upstream operations will
 * distribute to one downstream operation while the other two upstream operations will
 * distribute to the other downstream operations.
 *
 * <p>In cases where the different parallelisms are not multiples of each other one or several
 * downstream operations will have a differing number of inputs from upstream operations.
 *
 * @return The DataStream with rescale partitioning set.
 */
@PublicEvolving
public DataStream<T> rescale() {
	return setConnectionType(new RescalePartitioner<T>());
}

Example #15

Source File: DataStream.java From flink with Apache License 2.0

2 votes

/**
 * Sets the partitioning of the {@link DataStream} so that the output elements
 * are distributed evenly to a subset of instances of the next operation in a round-robin
 * fashion.
 *
 * <p>The subset of downstream operations to which the upstream operation sends
 * elements depends on the degree of parallelism of both the upstream and downstream operation.
 * For example, if the upstream operation has parallelism 2 and the downstream operation
 * has parallelism 4, then one upstream operation would distribute elements to two
 * downstream operations while the other upstream operation would distribute to the other
 * two downstream operations. If, on the other hand, the downstream operation has parallelism
 * 2 while the upstream operation has parallelism 4 then two upstream operations will
 * distribute to one downstream operation while the other two upstream operations will
 * distribute to the other downstream operations.
 *
 * <p>In cases where the different parallelisms are not multiples of each other one or several
 * downstream operations will have a differing number of inputs from upstream operations.
 *
 * @return The DataStream with rescale partitioning set.
 */
@PublicEvolving
public DataStream<T> rescale() {
	return setConnectionType(new RescalePartitioner<T>());
}