Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#transform()

The following examples show how to use org.apache.flink.streaming.api.datastream.DataStream#transform() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StreamGraphGeneratorTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Test whether an {@link OutputTypeConfigurable} implementation gets called with the correct
 * output type. In this test case the output type must be BasicTypeInfo.INT_TYPE_INFO.
 */
@Test
public void testOutputTypeConfigurationWithOneInputTransformation() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	OutputTypeConfigurableOperationWithOneInput outputTypeConfigurableOperation = new OutputTypeConfigurableOperationWithOneInput();

	DataStream<Integer> result = source.transform(
		"Single input and output type configurable operation",
		BasicTypeInfo.INT_TYPE_INFO,
		outputTypeConfigurableOperation);

	result.addSink(new DiscardingSink<>());

	env.getStreamGraph();

	assertEquals(BasicTypeInfo.INT_TYPE_INFO, outputTypeConfigurableOperation.getTypeInformation());
}
 
Example 2
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test whether an {@link OutputTypeConfigurable} implementation gets called with the correct
 * output type. In this test case the output type must be BasicTypeInfo.INT_TYPE_INFO.
 */
@Test
public void testOutputTypeConfigurationWithOneInputTransformation() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	OutputTypeConfigurableOperationWithOneInput outputTypeConfigurableOperation = new OutputTypeConfigurableOperationWithOneInput();

	DataStream<Integer> result = source.transform(
		"Single input and output type configurable operation",
		BasicTypeInfo.INT_TYPE_INFO,
		outputTypeConfigurableOperation);

	result.addSink(new DiscardingSink<>());

	env.getStreamGraph();

	assertEquals(BasicTypeInfo.INT_TYPE_INFO, outputTypeConfigurableOperation.getTypeInformation());
}
 
Example 3
Source File: StreamTaskTimerITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOperatorChainedToSource() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));

	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		verifyJobExecutionException(e);
	}
}
 
Example 4
Source File: StreamTaskTimerITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOneInputOperatorWithoutChaining() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.NEVER));

	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		verifyJobExecutionException(e);
	}
}
 
Example 5
Source File: StreamGraphGeneratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Test whether an {@link OutputTypeConfigurable} implementation gets called with the correct
 * output type. In this test case the output type must be BasicTypeInfo.INT_TYPE_INFO.
 */
@Test
public void testOutputTypeConfigurationWithOneInputTransformation() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStream<Integer> source = env.fromElements(1, 10);

	OutputTypeConfigurableOperationWithOneInput outputTypeConfigurableOperation = new OutputTypeConfigurableOperationWithOneInput();

	DataStream<Integer> result = source.transform(
		"Single input and output type configurable operation",
		BasicTypeInfo.INT_TYPE_INFO,
		outputTypeConfigurableOperation);

	result.addSink(new DiscardingSink<>());

	env.getStreamGraph();

	assertEquals(BasicTypeInfo.INT_TYPE_INFO, outputTypeConfigurableOperation.getTypeInformation());
}
 
Example 6
Source File: StreamTaskTimerITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOperatorChainedToSource() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));

	boolean testSuccess = false;
	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		if (e.getCause() instanceof TimerException) {
			TimerException te = (TimerException) e.getCause();
			if (te.getCause() instanceof RuntimeException) {
				RuntimeException re = (RuntimeException) te.getCause();
				if (re.getMessage().equals("TEST SUCCESS")) {
					testSuccess = true;
				} else {
					throw e;
				}
			} else {
				throw e;
			}
		} else {
			throw e;
		}
	}
	Assert.assertTrue(testSuccess);
}
 
Example 7
Source File: StreamTaskTimerITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOneInputOperatorWithoutChaining() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.NEVER));

	boolean testSuccess = false;
	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		if (e.getCause() instanceof TimerException) {
			TimerException te = (TimerException) e.getCause();
			if (te.getCause() instanceof RuntimeException) {
				RuntimeException re = (RuntimeException) te.getCause();
				if (re.getMessage().equals("TEST SUCCESS")) {
					testSuccess = true;
				} else {
					throw e;
				}
			} else {
				throw e;
			}
		} else {
			throw e;
		}
	}
	Assert.assertTrue(testSuccess);
}
 
Example 8
Source File: StreamTaskTimerITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOperatorChainedToSource() throws Exception {

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.ALWAYS));

	boolean testSuccess = false;
	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		if (e.getCause() instanceof TimerException) {
			TimerException te = (TimerException) e.getCause();
			if (te.getCause() instanceof RuntimeException) {
				RuntimeException re = (RuntimeException) te.getCause();
				if (re.getMessage().equals("TEST SUCCESS")) {
					testSuccess = true;
				} else {
					throw e;
				}
			} else {
				throw e;
			}
		} else {
			throw e;
		}
	}
	Assert.assertTrue(testSuccess);
}
 
Example 9
Source File: StreamTaskTimerITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Note: this test fails if we don't check for exceptions in the source contexts and do not
 * synchronize in the source contexts.
 */
@Test
public void testOneInputOperatorWithoutChaining() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(timeCharacteristic);
	env.setParallelism(1);

	DataStream<String> source = env.addSource(new InfiniteTestSource());

	source.transform("Custom Operator", BasicTypeInfo.STRING_TYPE_INFO, new TimerOperator(ChainingStrategy.NEVER));

	boolean testSuccess = false;
	try {
		env.execute("Timer test");
	} catch (JobExecutionException e) {
		if (e.getCause() instanceof TimerException) {
			TimerException te = (TimerException) e.getCause();
			if (te.getCause() instanceof RuntimeException) {
				RuntimeException re = (RuntimeException) te.getCause();
				if (re.getMessage().equals("TEST SUCCESS")) {
					testSuccess = true;
				} else {
					throw e;
				}
			} else {
				throw e;
			}
		} else {
			throw e;
		}
	}
	Assert.assertTrue(testSuccess);
}
 
Example 10
Source File: AsyncWaitOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This helper function is needed to check that the temporary fix for FLINK-13063 can be backwards compatible with
 * the old chaining behavior by setting the ChainingStrategy manually. TODO: remove after a proper fix for
 * FLINK-13063 is in place that allows chaining.
 */
private <IN, OUT> SingleOutputStreamOperator<OUT> addAsyncOperatorLegacyChained(
	DataStream<IN> in,
	AsyncFunction<IN, OUT> func,
	long timeout,
	int bufSize,
	AsyncDataStream.OutputMode mode) {

	TypeInformation<OUT> outTypeInfo = TypeExtractor.getUnaryOperatorReturnType(
		func,
		AsyncFunction.class,
		0,
		1,
		new int[]{1, 0},
		in.getType(),
		Utils.getCallLocationName(),
		true);

	// create transform
	AsyncWaitOperator<IN, OUT> operator = new AsyncWaitOperator<>(
		in.getExecutionEnvironment().clean(func),
		timeout,
		bufSize,
		mode);

	operator.setChainingStrategy(ChainingStrategy.ALWAYS);

	return in.transform("async wait operator", outTypeInfo, operator);
}
 
Example 11
Source File: SiddhiStreamFactory.java    From flink-siddhi with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static <OUT> DataStream<OUT> createDataStream(SiddhiOperatorContext context, DataStream<Tuple2<StreamRoute, Object>> namedStream) {
    return namedStream.transform(
        context.getName(),
        Types.TUPLE(TypeInformation.of(String.class), TypeInformation.of(Object.class)),
        new SiddhiStreamOperator(context));
}
 
Example 12
Source File: AsyncWaitOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * This helper function is needed to check that the temporary fix for FLINK-13063 can be backwards compatible with
 * the old chaining behavior by setting the ChainingStrategy manually. TODO: remove after a proper fix for
 * FLINK-13063 is in place that allows chaining.
 */
private <IN, OUT> SingleOutputStreamOperator<OUT> addAsyncOperatorLegacyChained(
	DataStream<IN> in,
	AsyncFunction<IN, OUT> func,
	long timeout,
	int bufSize,
	AsyncDataStream.OutputMode mode) {

	TypeInformation<OUT> outTypeInfo = TypeExtractor.getUnaryOperatorReturnType(
		func,
		AsyncFunction.class,
		0,
		1,
		new int[]{1, 0},
		in.getType(),
		Utils.getCallLocationName(),
		true);

	// create transform
	AsyncWaitOperatorFactory<IN, OUT> factory = new AsyncWaitOperatorFactory<>(
		in.getExecutionEnvironment().clean(func),
		timeout,
		bufSize,
		mode);

	factory.setChainingStrategy(ChainingStrategy.ALWAYS);

	return in.transform("async wait operator", outTypeInfo, factory);
}
 
Example 13
Source File: ContinuousFileProcessingITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// create the stream execution environment with a parallelism > 1 to test
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY,
			env.getParallelism(), INTERVAL);

	// the monitor has always DOP 1
	DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
	Assert.assertEquals(1, splits.getParallelism());

	ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
	TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);

	// the readers can be multiple
	DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader);
	Assert.assertEquals(PARALLELISM, content.getParallelism());

	// finally for the sink we set the parallelism to 1 so that we can verify the output
	TestingSinkFunction sink = new TestingSinkFunction();
	content.addSink(sink).setParallelism(1);

	Thread job = new Thread() {

		@Override
		public void run() {
			try {
				env.execute("ContinuousFileProcessingITCase Job.");
			} catch (Exception e) {
				Throwable th = e;
				for (int depth = 0; depth < 20; depth++) {
					if (th instanceof SuccessException) {
						return;
					} else if (th.getCause() != null) {
						th = th.getCause();
					} else {
						break;
					}
				}
				e.printStackTrace();
				Assert.fail(e.getMessage());
			}
		}
	};
	job.start();

	// The modification time of the last created file.
	long lastCreatedModTime = Long.MIN_VALUE;

	// create the files to be read
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
		long modTime;
		do {

			// give it some time so that the files have
			// different modification timestamps.
			Thread.sleep(50);

			tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");

			modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
			if (modTime <= lastCreatedModTime) {
				// delete the last created file to recreate it with a different timestamp
				hdfs.delete(tmpFile.f0, false);
			}
		} while (modTime <= lastCreatedModTime);
		lastCreatedModTime = modTime;

		// put the contents in the expected results list before the reader picks them
		// this is to guarantee that they are in before the reader finishes (avoid race conditions)
		expectedContents.put(i, tmpFile.f1);

		org.apache.hadoop.fs.Path file =
			new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
		hdfs.rename(tmpFile.f0, file);
		Assert.assertTrue(hdfs.exists(file));
	}

	// wait for the job to finish.
	job.join();
}
 
Example 14
Source File: ContinuousFileProcessingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// create the stream execution environment with a parallelism > 1 to test
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY,
			env.getParallelism(), INTERVAL);

	// the monitor has always DOP 1
	DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
	Assert.assertEquals(1, splits.getParallelism());

	ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format);
	TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);

	// the readers can be multiple
	DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader);
	Assert.assertEquals(PARALLELISM, content.getParallelism());

	// finally for the sink we set the parallelism to 1 so that we can verify the output
	TestingSinkFunction sink = new TestingSinkFunction();
	content.addSink(sink).setParallelism(1);

	Thread job = new Thread() {

		@Override
		public void run() {
			try {
				env.execute("ContinuousFileProcessingITCase Job.");
			} catch (Exception e) {
				Throwable th = e;
				for (int depth = 0; depth < 20; depth++) {
					if (th instanceof SuccessException) {
						return;
					} else if (th.getCause() != null) {
						th = th.getCause();
					} else {
						break;
					}
				}
				e.printStackTrace();
				Assert.fail(e.getMessage());
			}
		}
	};
	job.start();

	// The modification time of the last created file.
	long lastCreatedModTime = Long.MIN_VALUE;

	// create the files to be read
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
		long modTime;
		do {

			// give it some time so that the files have
			// different modification timestamps.
			Thread.sleep(50);

			tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");

			modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
			if (modTime <= lastCreatedModTime) {
				// delete the last created file to recreate it with a different timestamp
				hdfs.delete(tmpFile.f0, false);
			}
		} while (modTime <= lastCreatedModTime);
		lastCreatedModTime = modTime;

		// put the contents in the expected results list before the reader picks them
		// this is to guarantee that they are in before the reader finishes (avoid race conditions)
		expectedContents.put(i, tmpFile.f1);

		org.apache.hadoop.fs.Path file =
			new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
		hdfs.rename(tmpFile.f0, file);
		Assert.assertTrue(hdfs.exists(file));
	}

	// wait for the job to finish.
	job.join();
}
 
Example 15
Source File: SiddhiStreamFactory.java    From bahir-flink with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public static <OUT> DataStream<OUT> createDataStream(SiddhiOperatorContext context, DataStream<Tuple2<String, Object>> namedStream, String outStreamId) {
    return namedStream.transform(context.getName(), context.getOutputStreamType(), new SiddhiStreamOperator(context,outStreamId));
}
 
Example 16
Source File: ContinuousFileProcessingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// create the stream execution environment with a parallelism > 1 to test
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY,
			env.getParallelism(), INTERVAL);

	// the monitor has always DOP 1
	DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
	Assert.assertEquals(1, splits.getParallelism());

	TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);

	// the readers can be multiple
	DataStream<String> content = splits.transform("FileSplitReader", typeInfo, new ContinuousFileReaderOperatorFactory<>(format));
	Assert.assertEquals(PARALLELISM, content.getParallelism());

	// finally for the sink we set the parallelism to 1 so that we can verify the output
	TestingSinkFunction sink = new TestingSinkFunction();
	content.addSink(sink).setParallelism(1);

	CompletableFuture<Void> jobFuture = new CompletableFuture<>();
	new Thread(() -> {
		try {
			env.execute("ContinuousFileProcessingITCase Job.");
			jobFuture.complete(null);
		} catch (Exception e) {
			if (ExceptionUtils.findThrowable(e, SuccessException.class).isPresent()) {
				jobFuture.complete(null);
			} else {
				jobFuture.completeExceptionally(e);
			}
		}
	}).start();

	// The modification time of the last created file.
	long lastCreatedModTime = Long.MIN_VALUE;

	// create the files to be read
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
		long modTime;
		do {

			// give it some time so that the files have
			// different modification timestamps.
			Thread.sleep(50);

			tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");

			modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
			if (modTime <= lastCreatedModTime) {
				// delete the last created file to recreate it with a different timestamp
				hdfs.delete(tmpFile.f0, false);
			}
		} while (modTime <= lastCreatedModTime);
		lastCreatedModTime = modTime;

		// put the contents in the expected results list before the reader picks them
		// this is to guarantee that they are in before the reader finishes (avoid race conditions)
		expectedContents.put(i, tmpFile.f1);

		org.apache.hadoop.fs.Path file =
			new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
		hdfs.rename(tmpFile.f0, file);
		Assert.assertTrue(hdfs.exists(file));
	}

	jobFuture.get();
}