org.apache.flink.api.dag.Transformation Java Examples

The following examples show how to use org.apache.flink.api.dag.Transformation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TableEnvironmentImpl.java From flink with Apache License 2.0

6 votes

@Override
public TableResult executeInternal(List<ModifyOperation> operations) {
	List<Transformation<?>> transformations = translate(operations);
	List<String> sinkIdentifierNames = extractSinkIdentifierNames(operations);
	String jobName = "insert-into_" + String.join(",", sinkIdentifierNames);
	Pipeline pipeline = execEnv.createPipeline(transformations, tableConfig, jobName);
	try {
		JobClient jobClient = execEnv.executeAsync(pipeline);
		TableSchema.Builder builder = TableSchema.builder();
		Object[] affectedRowCounts = new Long[operations.size()];
		for (int i = 0; i < operations.size(); ++i) {
			// use sink identifier name as field name
			builder.field(sinkIdentifierNames.get(i), DataTypes.BIGINT());
			affectedRowCounts[i] = -1L;
		}

		return TableResultImpl.builder()
				.jobClient(jobClient)
				.resultKind(ResultKind.SUCCESS_WITH_CONTENT)
				.tableSchema(builder.build())
				.data(Collections.singletonList(Row.of(affectedRowCounts)))
				.build();
	} catch (Exception e) {
		throw new TableException("Failed to execute sql", e);
	}
}

Example #2

Source File: BatchExecutor.java From flink with Apache License 2.0

6 votes

@Override
public StreamGraph generateStreamGraph(List<Transformation<?>> transformations, String jobName) {
	StreamExecutionEnvironment execEnv = getExecutionEnvironment();
	setBatchProperties(execEnv);
	transformations.forEach(execEnv::addOperator);
	StreamGraph streamGraph;
	streamGraph = execEnv.getStreamGraph(getNonEmptyJobName(jobName));
	// All transformations should set managed memory size.
	ResourceSpec managedResourceSpec = NodeResourceUtil.fromManagedMem(0);
	streamGraph.getStreamNodes().forEach(sn -> {
		if (sn.getMinResources().equals(ResourceSpec.DEFAULT)) {
			sn.setResources(managedResourceSpec, managedResourceSpec);
		}
	});
	streamGraph.setChaining(true);
	streamGraph.setScheduleMode(ScheduleMode.LAZY_FROM_SOURCES_WITH_BATCH_SLOT_REQUEST);
	streamGraph.setStateBackend(null);
	if (streamGraph.getCheckpointConfig().isCheckpointingEnabled()) {
		throw new IllegalArgumentException("Checkpoint is not supported for batch jobs.");
	}
	if (isShuffleModeAllBatch()) {
		streamGraph.setBlockingConnectionsBetweenChains(true);
	}
	return streamGraph;
}

Example #3

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

/**
 * Transforms a {@code SelectTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} holds the selected names.
 *
 * @see org.apache.flink.streaming.api.graph.StreamGraphGenerator
 */
private <T> Collection<Integer> transformSelect(SelectTransformation<T> select) {
	Transformation<T> input = select.getInput();
	Collection<Integer> resultIds = transform(input);

	// the recursive transform might have already transformed this
	if (alreadyTransformed.containsKey(select)) {
		return alreadyTransformed.get(select);
	}

	List<Integer> virtualResultIds = new ArrayList<>();

	for (int inputId : resultIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualSelectNode(inputId, virtualId, select.getSelectedNames());
		virtualResultIds.add(virtualId);
	}
	return virtualResultIds;
}

Example #4

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

/**
 * Transforms a {@code SideOutputTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the side-output
 * {@link org.apache.flink.util.OutputTag}.
 *
 * @see org.apache.flink.streaming.api.graph.StreamGraphGenerator
 */
private <T> Collection<Integer> transformSideOutput(SideOutputTransformation<T> sideOutput) {
	Transformation<?> input = sideOutput.getInput();
	Collection<Integer> resultIds = transform(input);

	// the recursive transform might have already transformed this
	if (alreadyTransformed.containsKey(sideOutput)) {
		return alreadyTransformed.get(sideOutput);
	}

	List<Integer> virtualResultIds = new ArrayList<>();

	for (int inputId : resultIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualSideOutputNode(inputId, virtualId, sideOutput.getOutputTag());
		virtualResultIds.add(virtualId);
	}
	return virtualResultIds;
}

Example #5

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

/**
 * Transforms a {@code SplitTransformation}.
 *
 * <p>We add the output selector to previously transformed nodes.
 */
private <T> Collection<Integer> transformSplit(SplitTransformation<T> split) {

	Transformation<T> input = split.getInput();
	Collection<Integer> resultIds = transform(input);

	validateSplitTransformation(input);

	// the recursive transform call might have transformed this already
	if (alreadyTransformed.containsKey(split)) {
		return alreadyTransformed.get(split);
	}

	for (int inputId : resultIds) {
		streamGraph.addOutputSelector(inputId, split.getOutputSelector());
	}

	return resultIds;
}

Example #6

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

public StreamGraph generate() {
	streamGraph = new StreamGraph(executionConfig, checkpointConfig, savepointRestoreSettings);
	streamGraph.setStateBackend(stateBackend);
	streamGraph.setChaining(chaining);
	streamGraph.setScheduleMode(scheduleMode);
	streamGraph.setUserArtifacts(userArtifacts);
	streamGraph.setTimeCharacteristic(timeCharacteristic);
	streamGraph.setJobName(jobName);
	streamGraph.setGlobalDataExchangeMode(globalDataExchangeMode);

	alreadyTransformed = new HashMap<>();

	for (Transformation<?> transformation: transformations) {
		transform(transformation);
	}

	final StreamGraph builtStreamGraph = streamGraph;

	alreadyTransformed.clear();
	alreadyTransformed = null;
	streamGraph = null;

	return builtStreamGraph;
}

Example #7

Source File: TableEnvironmentImpl.java From flink with Apache License 2.0

6 votes

@Override
public TableResult executeInternal(QueryOperation operation) {
	SelectSinkOperation sinkOperation = new SelectSinkOperation(operation);
	List<Transformation<?>> transformations = translate(Collections.singletonList(sinkOperation));
	Pipeline pipeline = execEnv.createPipeline(transformations, tableConfig, "collect");
	try {
		JobClient jobClient = execEnv.executeAsync(pipeline);
		SelectResultProvider resultProvider = sinkOperation.getSelectResultProvider();
		resultProvider.setJobClient(jobClient);
		return TableResultImpl.builder()
				.jobClient(jobClient)
				.resultKind(ResultKind.SUCCESS_WITH_CONTENT)
				.tableSchema(operation.getTableSchema())
				.data(resultProvider.getResultIterator())
				.setPrintStyle(TableResultImpl.PrintStyle.tableau(
						PrintUtils.MAX_COLUMN_WIDTH, PrintUtils.NULL_COLUMN, true, isStreamingMode))
				.build();
	} catch (Exception e) {
		throw new TableException("Failed to execute sql", e);
	}
}

Example #8

Source File: DataStream.java From flink with Apache License 2.0

6 votes

/**
 * Creates a new {@link DataStream} by merging {@link DataStream} outputs of
 * the same type with each other. The DataStreams merged using this operator
 * will be transformed simultaneously.
 *
 * @param streams
 *            The DataStreams to union output with.
 * @return The {@link DataStream}.
 */
@SafeVarargs
public final DataStream<T> union(DataStream<T>... streams) {
	List<Transformation<T>> unionedTransforms = new ArrayList<>();
	unionedTransforms.add(this.transformation);

	for (DataStream<T> newStream : streams) {
		if (!getType().equals(newStream.getType())) {
			throw new IllegalArgumentException("Cannot union streams of different types: "
					+ getType() + " and " + newStream.getType());
		}

		unionedTransforms.add(newStream.getTransformation());
	}
	return new DataStream<>(this.environment, new UnionTransformation<>(unionedTransforms));
}

Example #9

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

public StreamGraph generate() {
	streamGraph = new StreamGraph(executionConfig, checkpointConfig);
	streamGraph.setStateBackend(stateBackend);
	streamGraph.setChaining(chaining);
	streamGraph.setScheduleMode(scheduleMode);
	streamGraph.setUserArtifacts(userArtifacts);
	streamGraph.setTimeCharacteristic(timeCharacteristic);
	streamGraph.setJobName(jobName);
	streamGraph.setBlockingConnectionsBetweenChains(blockingConnectionsBetweenChains);

	alreadyTransformed = new HashMap<>();

	for (Transformation<?> transformation: transformations) {
		transform(transformation);
	}

	final StreamGraph builtStreamGraph = streamGraph;

	alreadyTransformed.clear();
	alreadyTransformed = null;
	streamGraph = null;

	return builtStreamGraph;
}

Example #10

Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0

6 votes

/**
 * Test slot sharing is enabled.
 */
@Test
public void testEnableSlotSharing() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<Integer> sourceDataStream = env.fromElements(1, 2, 3);
	DataStream<Integer> mapDataStream = sourceDataStream.map(x -> x + 1);

	final List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(sourceDataStream.getTransformation());
	transformations.add(mapDataStream.getTransformation());

	// all stream nodes share default group by default
	StreamGraph streamGraph = new StreamGraphGenerator(
			transformations, env.getConfig(), env.getCheckpointConfig())
		.generate();

	Collection<StreamNode> streamNodes = streamGraph.getStreamNodes();
	for (StreamNode streamNode : streamNodes) {
		assertEquals(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP, streamNode.getSlotSharingGroup());
	}
}

Example #11

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

6 votes

/**
 * Transforms a {@code SideOutputTransformation}.
 *
 * <p>For this we create a virtual node in the {@code StreamGraph} that holds the side-output
 * {@link org.apache.flink.util.OutputTag}.
 *
 * @see org.apache.flink.streaming.api.graph.StreamGraphGenerator
 */
private <T> Collection<Integer> transformSideOutput(SideOutputTransformation<T> sideOutput) {
	Transformation<?> input = sideOutput.getInput();
	Collection<Integer> resultIds = transform(input);

	// the recursive transform might have already transformed this
	if (alreadyTransformed.containsKey(sideOutput)) {
		return alreadyTransformed.get(sideOutput);
	}

	List<Integer> virtualResultIds = new ArrayList<>();

	for (int inputId : resultIds) {
		int virtualId = Transformation.getNewNodeId();
		streamGraph.addVirtualSideOutputNode(inputId, virtualId, sideOutput.getOutputTag());
		virtualResultIds.add(virtualId);
	}
	return virtualResultIds;
}

Example #12

Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

/**
 * Test iteration job when disable slot sharing, check slot sharing group and co-location group.
 */
@Test
public void testIterationWithSlotSharingDisabled() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 2, 3).name("source");
	IterativeStream<Integer> iteration = source.iterate(3000);
	iteration.name("iteration").setParallelism(2);
	DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2);
	DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2);
	iteration.closeWith(filter).print();

	List<Transformation<?>> transformations = new ArrayList<>();
	transformations.add(source.getTransformation());
	transformations.add(iteration.getTransformation());
	transformations.add(map.getTransformation());
	transformations.add(filter.getTransformation());

	StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig());
	generator.setSlotSharingEnabled(false);
	StreamGraph streamGraph = generator.generate();

	for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) {
		assertNotNull(iterationPair.f0.getCoLocationGroup());
		assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup());

		assertNotNull(iterationPair.f0.getSlotSharingGroup());
		assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup());
	}
}

Example #13

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Transforms a {@code UnionTransformation}.
 *
 * <p>This is easy, we only have to transform the inputs and return all the IDs in a list so
 * that downstream operations can connect to all upstream nodes.
 */
private <T> Collection<Integer> transformUnion(UnionTransformation<T> union) {
	List<Transformation<T>> inputs = union.getInputs();
	List<Integer> resultIds = new ArrayList<>();

	for (Transformation<T> input: inputs) {
		resultIds.addAll(transform(input));
	}

	return resultIds;
}

Example #14

Source File: StreamTableEnvironmentImpl.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private <T> Transformation<T> getTransformation(
	Table table,
	List<Transformation<?>> transformations) {
	if (transformations.size() != 1) {
		throw new TableException(String.format(
			"Expected a single transformation for query: %s\n Got: %s",
			table.getQueryOperation().asSummaryString(),
			transformations));
	}

	return (Transformation<T>) transformations.get(0);
}

Example #15

Source File: CoFeedbackTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Adds a feedback edge. The parallelism of the {@code Transformation} must match
 * the parallelism of the input {@code Transformation} of the upstream
 * {@code Transformation}.
 *
 * @param transform The new feedback {@code Transformation}.
 */
public void addFeedbackEdge(Transformation<F> transform) {

	if (transform.getParallelism() != this.getParallelism()) {
		throw new UnsupportedOperationException(
				"Parallelism of the feedback stream must match the parallelism of the original" +
						" stream. Parallelism of original stream: " + this.getParallelism() +
						"; parallelism of feedback stream: " + transform.getParallelism());
	}

	feedbackEdges.add(transform);
}

Example #16

Source File: UnionTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@code UnionTransformation} from the given input {@code Transformations}.
 *
 * <p>The input {@code Transformations} must all have the same type.
 *
 * @param inputs The list of input {@code Transformations}
 */
public UnionTransformation(List<Transformation<T>> inputs) {
	super("Union", inputs.get(0).getOutputType(), inputs.get(0).getParallelism());

	for (Transformation<T> input: inputs) {
		if (!input.getOutputType().equals(getOutputType())) {
			throw new UnsupportedOperationException("Type mismatch in input " + input);
		}
	}

	this.inputs = Lists.newArrayList(inputs);
}

Example #17

Source File: StreamGraphGenerator.java From flink with Apache License 2.0

5 votes

/**
 * Transforms a {@code UnionTransformation}.
 *
 * <p>This is easy, we only have to transform the inputs and return all the IDs in a list so
 * that downstream operations can connect to all upstream nodes.
 */
private <T> Collection<Integer> transformUnion(UnionTransformation<T> union) {
	List<Transformation<T>> inputs = union.getInputs();
	List<Integer> resultIds = new ArrayList<>();

	for (Transformation<T> input: inputs) {
		resultIds.addAll(transform(input));
	}

	return resultIds;
}

Example #18

Source File: FeedbackTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

Example #19

Source File: IterativeStream.java From flink with Apache License 2.0

5 votes

/**
 * Closes the iteration. This method defines the end of the iterative
 * program part that will be fed back to the start of the iteration as
 * the second input in the {@link ConnectedStreams}.
 *
 * @param feedbackStream
 *            {@link DataStream} that will be used as second input to
 *            the iteration head.
 * @return The feedback stream.
 *
 */
public DataStream<F> closeWith(DataStream<F> feedbackStream) {

	Collection<Transformation<?>> predecessors = feedbackStream.getTransformation().getTransitivePredecessors();

	if (!predecessors.contains(this.coFeedbackTransformation)) {
		throw new UnsupportedOperationException(
				"Cannot close an iteration with a feedback DataStream that does not originate from said iteration.");
	}

	coFeedbackTransformation.addFeedbackEdge(feedbackStream.getTransformation());

	return feedbackStream;
}

Example #20

Source File: FeedbackTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Adds a feedback edge. The parallelism of the {@code Transformation} must match
 * the parallelism of the input {@code Transformation} of this
 * {@code FeedbackTransformation}
 *
 * @param transform The new feedback {@code Transformation}.
 */
public void addFeedbackEdge(Transformation<T> transform) {

	if (transform.getParallelism() != this.getParallelism()) {
		throw new UnsupportedOperationException(
				"Parallelism of the feedback stream must match the parallelism of the original" +
						" stream. Parallelism of original stream: " + this.getParallelism() +
						"; parallelism of feedback stream: " + transform.getParallelism() +
						". Parallelism can be modified using DataStream#setParallelism() method");
	}

	feedbackEdges.add(transform);
}

Example #21

Source File: SinkTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

Example #22

Source File: IterativeStream.java From flink with Apache License 2.0

5 votes

/**
 * Closes the iteration. This method defines the end of the iterative
 * program part that will be fed back to the start of the iteration as
 * the second input in the {@link ConnectedStreams}.
 *
 * @param feedbackStream
 *            {@link DataStream} that will be used as second input to
 *            the iteration head.
 * @return The feedback stream.
 *
 */
public DataStream<F> closeWith(DataStream<F> feedbackStream) {

	Collection<Transformation<?>> predecessors = feedbackStream.getTransformation().getTransitivePredecessors();

	if (!predecessors.contains(this.coFeedbackTransformation)) {
		throw new UnsupportedOperationException(
				"Cannot close an iteration with a feedback DataStream that does not originate from said iteration.");
	}

	coFeedbackTransformation.addFeedbackEdge(feedbackStream.getTransformation());

	return feedbackStream;
}

Example #23

Source File: SinkTransformation.java From flink with Apache License 2.0

5 votes

public SinkTransformation(
		Transformation<T> input,
		String name,
		StreamOperatorFactory<Object> operatorFactory,
		int parallelism) {
	super(name, TypeExtractor.getForClass(Object.class), parallelism);
	this.input = input;
	this.operatorFactory = operatorFactory;
}

Example #24

Source File: SplitTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

Example #25

Source File: FeedbackTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Adds a feedback edge. The parallelism of the {@code Transformation} must match
 * the parallelism of the input {@code Transformation} of this
 * {@code FeedbackTransformation}
 *
 * @param transform The new feedback {@code Transformation}.
 */
public void addFeedbackEdge(Transformation<T> transform) {

	if (transform.getParallelism() != this.getParallelism()) {
		throw new UnsupportedOperationException(
				"Parallelism of the feedback stream must match the parallelism of the original" +
						" stream. Parallelism of original stream: " + this.getParallelism() +
						"; parallelism of feedback stream: " + transform.getParallelism() +
						". Parallelism can be modified using DataStream#setParallelism() method");
	}

	feedbackEdges.add(transform);
}

Example #26

Source File: OneInputTransformation.java From flink with Apache License 2.0

5 votes

public OneInputTransformation(
		Transformation<IN> input,
		String name,
		StreamOperatorFactory<OUT> operatorFactory,
		TypeInformation<OUT> outputType,
		int parallelism) {
	super(name, outputType, parallelism);
	this.input = input;
	this.operatorFactory = operatorFactory;
}

Example #27

Source File: SideOutputTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}

Example #28

Source File: SplitTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@code SplitTransformation} from the given input and {@code OutputSelector}.
 *
 * @param input The input {@code Transformation}
 * @param outputSelector The output selector
 */
public SplitTransformation(
	Transformation<T> input,
		OutputSelector<T> outputSelector) {
	super("Split", input.getOutputType(), input.getParallelism());
	this.input = input;
	this.outputSelector = outputSelector;
}

Example #29

Source File: UnionTransformation.java From flink with Apache License 2.0

5 votes

/**
 * Creates a new {@code UnionTransformation} from the given input {@code Transformations}.
 *
 * <p>The input {@code Transformations} must all have the same type.
 *
 * @param inputs The list of input {@code Transformations}
 */
public UnionTransformation(List<Transformation<T>> inputs) {
	super("Union", inputs.get(0).getOutputType(), inputs.get(0).getParallelism());

	for (Transformation<T> input: inputs) {
		if (!input.getOutputType().equals(getOutputType())) {
			throw new UnsupportedOperationException("Type mismatch in input " + input);
		}
	}

	this.inputs = Lists.newArrayList(inputs);
}

Example #30

Source File: SplitTransformation.java From flink with Apache License 2.0

5 votes

@Override
public Collection<Transformation<?>> getTransitivePredecessors() {
	List<Transformation<?>> result = Lists.newArrayList();
	result.add(this);
	result.addAll(input.getTransitivePredecessors());
	return result;
}