org.apache.flink.streaming.api.transformations.PartitionTransformation Java Examples

The following examples show how to use org.apache.flink.streaming.api.transformations.PartitionTransformation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: StreamingJobGraphGeneratorWithGlobalDataExchangeModeTest.java From flink with Apache License 2.0

6 votes

/**
 * Topology: source(parallelism=1) --(forward)--> map1(parallelism=1)
 *           --(rescale)--> map2(parallelism=2) --(rebalance)--> sink(parallelism=2).
 */
private static StreamGraph createStreamGraph() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source = env.fromElements(1, 2, 3).setParallelism(1);

	final DataStream<Integer> forward = new DataStream<>(env, new PartitionTransformation<>(
		source.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	final DataStream<Integer> map1 = forward.map(i -> i).startNewChain().setParallelism(1);

	final DataStream<Integer> rescale = new DataStream<>(env, new PartitionTransformation<>(
		map1.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	final DataStream<Integer> map2 = rescale.map(i -> i).setParallelism(2);

	map2.rebalance().print().setParallelism(2);

	return env.getStreamGraph();
}

Example #2

Source File: DataStreamUtils.java From flink with Apache License 2.0

6 votes

/**
 * Reinterprets the given {@link DataStream} as a {@link KeyedStream}, which extracts keys with the given
 * {@link KeySelector}.
 *
 * <p>IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be
 * partitioned exactly in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 *
 * @param stream      The data stream to reinterpret. For every partition, this stream must be partitioned exactly
 *                    in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 * @param keySelector Function that defines how keys are extracted from the data stream.
 * @param typeInfo    Explicit type information about the key type.
 * @param <T>         Type of events in the data stream.
 * @param <K>         Type of the extracted keys.
 * @return The reinterpretation of the {@link DataStream} as a {@link KeyedStream}.
 */
public static <T, K> KeyedStream<T, K> reinterpretAsKeyedStream(
	DataStream<T> stream,
	KeySelector<T, K> keySelector,
	TypeInformation<K> typeInfo) {

	PartitionTransformation<T> partitionTransformation = new PartitionTransformation<>(
		stream.getTransformation(),
		new ForwardPartitioner<>());

	return new KeyedStream<>(
		stream,
		partitionTransformation,
		keySelector,
		typeInfo);
}

Example #3

Source File: StreamingJobGraphGeneratorWithGlobalDataExchangeModeTest.java From flink with Apache License 2.0

6 votes

@Test
public void testGlobalDataExchangeModeDoesNotOverrideSpecifiedShuffleMode() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	final DataStream<Integer> source = env.fromElements(1, 2, 3).setParallelism(1);
	final DataStream<Integer> forward = new DataStream<>(env, new PartitionTransformation<>(
		source.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	forward.map(i -> i).startNewChain().setParallelism(1);
	final StreamGraph streamGraph = env.getStreamGraph();
	streamGraph.setGlobalDataExchangeMode(GlobalDataExchangeMode.ALL_EDGES_BLOCKING);

	final JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	final List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	final JobVertex sourceVertex = verticesSorted.get(0);

	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
}

Example #4

Source File: DataStreamUtils.java From flink with Apache License 2.0

6 votes

/**
 * Reinterprets the given {@link DataStream} as a {@link KeyedStream}, which extracts keys with the given
 * {@link KeySelector}.
 *
 * <p>IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be
 * partitioned exactly in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 *
 * @param stream      The data stream to reinterpret. For every partition, this stream must be partitioned exactly
 *                    in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 * @param keySelector Function that defines how keys are extracted from the data stream.
 * @param typeInfo    Explicit type information about the key type.
 * @param <T>         Type of events in the data stream.
 * @param <K>         Type of the extracted keys.
 * @return The reinterpretation of the {@link DataStream} as a {@link KeyedStream}.
 */
public static <T, K> KeyedStream<T, K> reinterpretAsKeyedStream(
	DataStream<T> stream,
	KeySelector<T, K> keySelector,
	TypeInformation<K> typeInfo) {

	PartitionTransformation<T> partitionTransformation = new PartitionTransformation<>(
		stream.getTransformation(),
		new ForwardPartitioner<>());

	return new KeyedStream<>(
		stream,
		partitionTransformation,
		keySelector,
		typeInfo);
}

Example #5

Source File: SiddhiStream.java From flink-siddhi with Apache License 2.0

6 votes

/**
 * Siddhi Continuous Query Language (CQL)
 *
 * @return ExecutionSiddhiStream context
 */
public ExecutionSiddhiStream cql(DataStream<ControlEvent> controlStream) {
    DataStream<Tuple2<StreamRoute, Object>> unionStream = controlStream
        .map(new NamedControlStream(ControlEvent.DEFAULT_INTERNAL_CONTROL_STREAM))
        .broadcast()
        .union(this.toDataStream())
        .transform("add route transform",
            SiddhiTypeFactory.getStreamTupleTypeInformation(TypeInformation.of(Object.class)),
            new AddRouteOperator(getCepEnvironment().getDataStreamSchemas()));

    DataStream<Tuple2<StreamRoute, Object>> partitionedStream = new DataStream<>(
        unionStream.getExecutionEnvironment(),
        new PartitionTransformation<>(unionStream.getTransformation(),
        new DynamicPartitioner()));
    return new ExecutionSiddhiStream(partitionedStream, null, getCepEnvironment());
}

Example #6

Source File: DataStreamUtils.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Reinterprets the given {@link DataStream} as a {@link KeyedStream}, which extracts keys with the given
 * {@link KeySelector}.
 *
 * <p>IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be
 * partitioned exactly in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 *
 * @param stream      The data stream to reinterpret. For every partition, this stream must be partitioned exactly
 *                    in the same way as if it was created through a {@link DataStream#keyBy(KeySelector)}.
 * @param keySelector Function that defines how keys are extracted from the data stream.
 * @param typeInfo    Explicit type information about the key type.
 * @param <T>         Type of events in the data stream.
 * @param <K>         Type of the extracted keys.
 * @return The reinterpretation of the {@link DataStream} as a {@link KeyedStream}.
 */
public static <T, K> KeyedStream<T, K> reinterpretAsKeyedStream(
	DataStream<T> stream,
	KeySelector<T, K> keySelector,
	TypeInformation<K> typeInfo) {

	PartitionTransformation<T> partitionTransformation = new PartitionTransformation<>(
		stream.getTransformation(),
		new ForwardPartitioner<>());

	return new KeyedStream<>(
		stream,
		partitionTransformation,
		keySelector,
		typeInfo);
}

Example #7

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Verify that "blockingConnectionsBetweenChains" is off by default.
 */
@Test
public void testBlockingAfterChainingOffDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Filter -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	// partition transformation with an undefined shuffle mode between source and filter
	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
		sourceDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> filterDataStream = partitionAfterSourceDataStream.filter(value -> true).setParallelism(2);

	DataStream<Integer> partitionAfterFilterDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));

	partitionAfterFilterDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex filterAndPrintVertex = verticesSorted.get(1);

	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			filterAndPrintVertex.getInputs().get(0).getSource().getResultType());
}

Example #8

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Create a StreamGraph as below.
 *
 * <p>source1 --(rebalance & pipelined)--> Map1
 *
 * <p>source2 --(rebalance & blocking)--> Map2
 */
private StreamGraph createStreamGraphForSlotSharingTest() {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	final DataStream<Integer> source1 = env.fromElements(1, 2, 3).name("source1");
	source1.rebalance().map(v -> v).name("map1");

	final DataStream<Integer> source2 = env.fromElements(4, 5, 6).name("source2");
	final DataStream<Integer> partitioned = new DataStream<>(env, new PartitionTransformation<>(
		source2.getTransformation(), new RebalancePartitioner<>(), ShuffleMode.BATCH));
	partitioned.map(v -> v).name("map2");

	return env.getStreamGraph();
}

Example #9

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#UNDEFINED}.
 */
@Test
public void testShuffleModeUndefined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with UNDEFINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// UNDEFINED shuffle mode is translated into PIPELINED_BOUNDED result partition by default
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
		sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Example #10

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Example #11

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#PIPELINED}.
 */
@Test
public void testShuffleModePipelined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with PIPELINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// PIPELINED shuffle mode is translated into PIPELINED_BOUNDED result partition
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Example #12

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

private <T> void validateSplitTransformation(Transformation<T> input) {
	if (input instanceof SelectTransformation || input instanceof SplitTransformation) {
		throw new IllegalStateException("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof SideOutputTransformation) {
		throw new IllegalStateException("Split after side-outputs are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof UnionTransformation) {
		for (Transformation<T> transformation : ((UnionTransformation<T>) input).getInputs()) {
			validateSplitTransformation(transformation);
		}
	} else if (input instanceof PartitionTransformation) {
		validateSplitTransformation(((PartitionTransformation) input).getInput());
	} else {
		return;
	}
}

Example #13

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Transforms a {@code PartitionTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the partition
 * property. @see StreamGraphGenerator
 */
private <T> Collection<Integer> transformPartition(PartitionTransformation<T> partition) {
	Transformation<T> input = partition.getInput();
	List<Integer> resultIds = new ArrayList<>();

	Collection<Integer> transformedIds = transform(input);
	for (Integer transformedId: transformedIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualPartitionNode(
				transformedId, virtualId, partition.getPartitioner(), partition.getShuffleMode());
		resultIds.add(virtualId);
	}

	return resultIds;
}

Example #14

Source File: KeyedStream.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector, TypeInformation<KEY> keyType) {
	this(
		dataStream,
		new PartitionTransformation<>(
			dataStream.getTransformation(),
			new KeyGroupStreamPartitioner<>(keySelector, StreamGraphGenerator.DEFAULT_LOWER_BOUND_MAX_PARALLELISM)),
		keySelector,
		keyType);
}

Example #15

Source File: HiveTableSourceITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testParallelismOnLimitPushDown() {
	final String dbName = "source_db";
	final String tblName = "test_parallelism_limit_pushdown";
	TableEnvironment tEnv = createTableEnv();
	tEnv.getConfig().getConfiguration().setBoolean(
			HiveOptions.TABLE_EXEC_HIVE_INFER_SOURCE_PARALLELISM, false);
	tEnv.getConfig().getConfiguration().setInteger(
			ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 2);
	tEnv.executeSql("CREATE TABLE source_db.test_parallelism_limit_pushdown " +
				"(`year` STRING, `value` INT) partitioned by (pt int)");
	HiveTestUtils.createTextTableInserter(hiveShell, dbName, tblName)
				.addRow(new Object[]{"2014", 3})
				.addRow(new Object[]{"2014", 4})
				.commit("pt=0");
	HiveTestUtils.createTextTableInserter(hiveShell, dbName, tblName)
				.addRow(new Object[]{"2015", 2})
				.addRow(new Object[]{"2015", 5})
				.commit("pt=1");
	Table table = tEnv.sqlQuery("select * from hive.source_db.test_parallelism_limit_pushdown limit 1");
	PlannerBase planner = (PlannerBase) ((TableEnvironmentImpl) tEnv).getPlanner();
	RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
	ExecNode execNode = planner.translateToExecNodePlan(toScala(Collections.singletonList(relNode))).get(0);
	@SuppressWarnings("unchecked")
	Transformation transformation = execNode.translateToPlan(planner);
	Assert.assertEquals(1, ((PartitionTransformation) ((OneInputTransformation) transformation).getInput())
		.getInput().getParallelism());
}

Example #16

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test enabling the property "blockingConnectionsBetweenChains".
 */
@Test
public void testBlockingConnectionsBetweenChainsEnabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Filter -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	// partition transformation with an undefined shuffle mode between source and filter
	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
		sourceDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> filterDataStream = partitionAfterSourceDataStream.filter(value -> true).setParallelism(2);

	DataStream<Integer> partitionAfterFilterDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterFilterDataStream.map(value -> value).setParallelism(2);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
		filterDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(1);

	StreamGraph streamGraph = env.getStreamGraph();
	streamGraph.setBlockingConnectionsBetweenChains(true);
	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	JobVertex sourceVertex = verticesSorted.get(0);
	// still can be chained
	JobVertex filterAndMapVertex = verticesSorted.get(1);
	JobVertex printVertex = verticesSorted.get(2);

	// the edge with undefined shuffle mode is translated into BLOCKING
	assertEquals(ResultPartitionType.BLOCKING, sourceVertex.getProducedDataSets().get(0).getResultType());
	// the edge with PIPELINED shuffle mode is translated into PIPELINED_BOUNDED
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, filterAndMapVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED, printVertex.getInputs().get(0).getSource().getResultType());
}

Example #17

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#UNDEFINED}.
 */
@Test
public void testShuffleModeUndefined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.UNDEFINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.UNDEFINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with UNDEFINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// UNDEFINED shuffle mode is translated into PIPELINED_BOUNDED result partition by default
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
		sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Example #18

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#BATCH}.
 */
@Test
public void testShuffleModeBatch() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.BATCH));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.BATCH));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(3, verticesSorted.size());

	// it can not be chained with BATCH shuffle mode
	JobVertex sourceVertex = verticesSorted.get(0);
	JobVertex mapVertex = verticesSorted.get(1);

	// BATCH shuffle mode is translated into BLOCKING result partition
	assertEquals(ResultPartitionType.BLOCKING,
		sourceVertex.getProducedDataSets().get(0).getResultType());
	assertEquals(ResultPartitionType.BLOCKING,
		mapVertex.getProducedDataSets().get(0).getResultType());
}

Example #19

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test setting shuffle mode to {@link ShuffleMode#PIPELINED}.
 */
@Test
public void testShuffleModePipelined() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// fromElements -> Map -> Print
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);

	DataStream<Integer> partitionAfterSourceDataStream = new DataStream<>(env, new PartitionTransformation<>(
			sourceDataStream.getTransformation(), new ForwardPartitioner<>(), ShuffleMode.PIPELINED));
	DataStream<Integer> mapDataStream = partitionAfterSourceDataStream.map(value -> value).setParallelism(1);

	DataStream<Integer> partitionAfterMapDataStream = new DataStream<>(env, new PartitionTransformation<>(
			mapDataStream.getTransformation(), new RescalePartitioner<>(), ShuffleMode.PIPELINED));
	partitionAfterMapDataStream.print().setParallelism(2);

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());

	List<JobVertex> verticesSorted = jobGraph.getVerticesSortedTopologicallyFromSources();
	assertEquals(2, verticesSorted.size());

	// it can be chained with PIPELINED shuffle mode
	JobVertex sourceAndMapVertex = verticesSorted.get(0);

	// PIPELINED shuffle mode is translated into PIPELINED_BOUNDED result partition
	assertEquals(ResultPartitionType.PIPELINED_BOUNDED,
			sourceAndMapVertex.getProducedDataSets().get(0).getResultType());
}

Example #20

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

private <T> void validateSplitTransformation(Transformation<T> input) {
	if (input instanceof SelectTransformation || input instanceof SplitTransformation) {
		throw new IllegalStateException("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof SideOutputTransformation) {
		throw new IllegalStateException("Split after side-outputs are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof UnionTransformation) {
		for (Transformation<T> transformation : ((UnionTransformation<T>) input).getInputs()) {
			validateSplitTransformation(transformation);
		}
	} else if (input instanceof PartitionTransformation) {
		validateSplitTransformation(((PartitionTransformation) input).getInput());
	} else {
		return;
	}
}

Example #21

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Transforms a {@code PartitionTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the partition
 * property. @see StreamGraphGenerator
 */
private <T> Collection<Integer> transformPartition(PartitionTransformation<T> partition) {
	Transformation<T> input = partition.getInput();
	List<Integer> resultIds = new ArrayList<>();

	Collection<Integer> transformedIds = transform(input);
	for (Integer transformedId: transformedIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualPartitionNode(
				transformedId, virtualId, partition.getPartitioner(), partition.getShuffleMode());
		resultIds.add(virtualId);
	}

	return resultIds;
}

Example #22

Source File: KeyedStream.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector, TypeInformation<KEY> keyType) {
	this(
		dataStream,
		new PartitionTransformation<>(
			dataStream.getTransformation(),
			new KeyGroupStreamPartitioner<>(keySelector, StreamGraphGenerator.DEFAULT_LOWER_BOUND_MAX_PARALLELISM)),
		keySelector,
		keyType);
}

Example #23

Source File: StreamGraphGenerator.java From Flink-CEPplus with Apache License 2.0

5 votes

private <T> void validateSplitTransformation(StreamTransformation<T> input) {
	if (input instanceof SelectTransformation || input instanceof SplitTransformation) {
		throw new IllegalStateException("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof SideOutputTransformation) {
		throw new IllegalStateException("Split after side-outputs are not supported. Splits are deprecated. Please use side-outputs.");
	} else if (input instanceof UnionTransformation) {
		for (StreamTransformation<T> transformation : ((UnionTransformation<T>) input).getInputs()) {
			validateSplitTransformation(transformation);
		}
	} else if (input instanceof PartitionTransformation) {
		validateSplitTransformation(((PartitionTransformation) input).getInput());
	} else {
		return;
	}
}

Example #24

Source File: StreamGraphGenerator.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Transforms a {@code PartitionTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the partition
 * property. @see StreamGraphGenerator
 */
private <T> Collection<Integer> transformPartition(PartitionTransformation<T> partition) {
	StreamTransformation<T> input = partition.getInput();
	List<Integer> resultIds = new ArrayList<>();

	Collection<Integer> transformedIds = transform(input);
	for (Integer transformedId: transformedIds) {
		int virtualId = StreamTransformation.getNewNodeId();
		streamGraph.addVirtualPartitionNode(transformedId, virtualId, partition.getPartitioner());
		resultIds.add(virtualId);
	}

	return resultIds;
}

Example #25

Source File: KeyedStream.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector}
 * to partition operator state by key.
 *
 * @param dataStream
 *            Base stream of data
 * @param keySelector
 *            Function for determining state partitions
 */
public KeyedStream(DataStream<T> dataStream, KeySelector<T, KEY> keySelector, TypeInformation<KEY> keyType) {
	this(
		dataStream,
		new PartitionTransformation<>(
			dataStream.getTransformation(),
			new KeyGroupStreamPartitioner<>(keySelector, StreamGraphGenerator.DEFAULT_LOWER_BOUND_MAX_PARALLELISM)),
		keySelector,
		keyType);
}

Example #26

Source File: KeyedStream.java From flink with Apache License 2.0

3 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector} and {@link TypeInformation}
 * to partition operator state by key, where the partitioning is defined by a {@link PartitionTransformation}.
 *
 * @param stream
 *            Base stream of data
 * @param partitionTransformation
 *            Function that determines how the keys are distributed to downstream operator(s)
 * @param keySelector
 *            Function to extract keys from the base stream
 * @param keyType
 *            Defines the type of the extracted keys
 */
@Internal
KeyedStream(
	DataStream<T> stream,
	PartitionTransformation<T> partitionTransformation,
	KeySelector<T, KEY> keySelector,
	TypeInformation<KEY> keyType) {

	super(stream.getExecutionEnvironment(), partitionTransformation);
	this.keySelector = clean(keySelector);
	this.keyType = validateKeyType(keyType);
}

Example #27

Source File: KeyedStream.java From flink with Apache License 2.0

3 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector} and {@link TypeInformation}
 * to partition operator state by key, where the partitioning is defined by a {@link PartitionTransformation}.
 *
 * @param stream
 *            Base stream of data
 * @param partitionTransformation
 *            Function that determines how the keys are distributed to downstream operator(s)
 * @param keySelector
 *            Function to extract keys from the base stream
 * @param keyType
 *            Defines the type of the extracted keys
 */
@Internal
KeyedStream(
	DataStream<T> stream,
	PartitionTransformation<T> partitionTransformation,
	KeySelector<T, KEY> keySelector,
	TypeInformation<KEY> keyType) {

	super(stream.getExecutionEnvironment(), partitionTransformation);
	this.keySelector = clean(keySelector);
	this.keyType = validateKeyType(keyType);
}

Example #28

Source File: KeyedStream.java From Flink-CEPplus with Apache License 2.0

3 votes

/**
 * Creates a new {@link KeyedStream} using the given {@link KeySelector} and {@link TypeInformation}
 * to partition operator state by key, where the partitioning is defined by a {@link PartitionTransformation}.
 *
 * @param stream
 *            Base stream of data
 * @param partitionTransformation
 *            Function that determines how the keys are distributed to downstream operator(s)
 * @param keySelector
 *            Function to extract keys from the base stream
 * @param keyType
 *            Defines the type of the extracted keys
 */
@Internal
KeyedStream(
	DataStream<T> stream,
	PartitionTransformation<T> partitionTransformation,
	KeySelector<T, KEY> keySelector,
	TypeInformation<KEY> keyType) {

	super(stream.getExecutionEnvironment(), partitionTransformation);
	this.keySelector = clean(keySelector);
	this.keyType = validateKeyType(keyType);
}

Example #29

Source File: DataStream.java From flink with Apache License 2.0

2 votes

/**
 * Internal function for setting the partitioner for the DataStream.
 *
 * @param partitioner
 *            Partitioner to set.
 * @return The modified DataStream.
 */
protected DataStream<T> setConnectionType(StreamPartitioner<T> partitioner) {
	return new DataStream<>(this.getExecutionEnvironment(), new PartitionTransformation<>(this.getTransformation(), partitioner));
}

Example #30

Source File: DataStream.java From flink with Apache License 2.0

2 votes

/**
 * Internal function for setting the partitioner for the DataStream.
 *
 * @param partitioner
 *            Partitioner to set.
 * @return The modified DataStream.
 */
protected DataStream<T> setConnectionType(StreamPartitioner<T> partitioner) {
	return new DataStream<>(this.getExecutionEnvironment(), new PartitionTransformation<>(this.getTransformation(), partitioner));
}