org.apache.flink.streaming.api.functions.source.FileProcessingMode Java Examples

The following examples show how to use org.apache.flink.streaming.api.functions.source.FileProcessingMode. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ContinuousFileProcessingTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidPathSpecification() throws Exception {

	String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/";
	TextInputFormat format = new TextInputFormat(new Path(invalidPath));

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
	try {
		monitoringFunction.run(new DummySourceContext() {
			@Override
			public void collect(TimestampedFileInputSplit element) {
				// we should never arrive here with an invalid path
				Assert.fail("Test passes with an invalid path.");
			}
		});

		// we should never arrive here with an invalid path
		Assert.fail("Test passed with an invalid path.");

	} catch (FileNotFoundException e) {
		Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage());
	}
}
 
Example #2
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidPathSpecification() throws Exception {

	String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/";
	TextInputFormat format = new TextInputFormat(new Path(invalidPath));

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
	try {
		monitoringFunction.run(new DummySourceContext() {
			@Override
			public void collect(TimestampedFileInputSplit element) {
				// we should never arrive here with an invalid path
				Assert.fail("Test passes with an invalid path.");
			}
		});

		// we should never arrive here with an invalid path
		Assert.fail("Test passed with an invalid path.");

	} catch (FileNotFoundException e) {
		Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage());
	}
}
 
Example #3
Source File: ContinuousFileProcessingCheckpointITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void testProgram(StreamExecutionEnvironment env) {

	env.enableCheckpointing(10);

	// create and start the file creating thread.
	fc = new FileCreator();
	fc.start();

	// create the monitoring source along with the necessary readers.
	TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	DataStream<String> inputStream = env.readFile(format, localFsURI,
		FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL);

	TestingSinkFunction sink = new TestingSinkFunction();

	inputStream.flatMap(new FlatMapFunction<String, String>() {
		@Override
		public void flatMap(String value, Collector<String> out) throws Exception {
			out.collect(value);
		}
	}).addSink(sink).setParallelism(1);
}
 
Example #4
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testInvalidPathSpecification() throws Exception {

	String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/";
	TextInputFormat format = new TextInputFormat(new Path(invalidPath));

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_ONCE, 1, INTERVAL);
	try {
		monitoringFunction.run(new DummySourceContext() {
			@Override
			public void collect(TimestampedFileInputSplit element) {
				// we should never arrive here with an invalid path
				Assert.fail("Test passes with an invalid path.");
			}
		});

		// we should never arrive here with an invalid path
		Assert.fail("Test passed with an invalid path.");

	} catch (FileNotFoundException e) {
		Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage());
	}
}
 
Example #5
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Create continuous monitoring function with 1 reader-parallelism and interval: {@link #INTERVAL}.
 */
private <OUT> ContinuousFileMonitoringFunction<OUT> createTestContinuousFileMonitoringFunction(FileInputFormat<OUT> format, FileProcessingMode fileProcessingMode) {
	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format, fileProcessingMode, 1, INTERVAL);
	monitoringFunction.setRuntimeContext(Mockito.mock(RuntimeContext.class));
	return monitoringFunction;
}
 
Example #6
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 5 votes vote down vote up
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat,
													TypeInformation<OUT> typeInfo,
													String sourceName,
													FileProcessingMode monitoringMode,
													long interval) {

	Preconditions.checkNotNull(inputFormat, "Unspecified file input format.");
	Preconditions.checkNotNull(typeInfo, "Unspecified output type information.");
	Preconditions.checkNotNull(sourceName, "Unspecified name for the source.");
	Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode.");

	Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) ||
			interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL,
		"The path monitoring interval cannot be less than " +
				ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms.");

	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval);

	ContinuousFileReaderOperatorFactory<OUT, TimestampedFileInputSplit> factory =
			new ContinuousFileReaderOperatorFactory<>(inputFormat);

	SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, factory);

	return new DataStreamSource<>(source);
}
 
Example #7
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Create continuous monitoring function with 1 reader-parallelism and interval: {@link #INTERVAL}.
 */
private <OUT> ContinuousFileMonitoringFunction<OUT> createTestContinuousFileMonitoringFunction(FileInputFormat<OUT> format, FileProcessingMode fileProcessingMode) {
	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format, fileProcessingMode, 1, INTERVAL);
	monitoringFunction.setRuntimeContext(Mockito.mock(RuntimeContext.class));
	return monitoringFunction;
}
 
Example #8
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSortingOnModTime() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final long[] modTimes = new long[NO_OF_FILES];
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];

	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		Thread.sleep(400);

		filesCreated[i] = file.f0;
		modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime();
	}

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// this is just to verify that all splits have been forwarded later.
	FileInputSplit[] splits = format.createInputSplits(1);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE);

	ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes);

	monitoringFunction.open(new Configuration());
	monitoringFunction.run(context);
	Assert.assertEquals(splits.length, context.getCounter());

	// delete the created files.
	for (int i = 0; i < NO_OF_FILES; i++) {
		hdfs.delete(filesCreated[i], false);
	}
}
 
Example #9
Source File: HiveTableSource.java    From flink with Apache License 2.0 5 votes vote down vote up
private DataStream<RowData> createStreamSourceForNonPartitionTable(
		StreamExecutionEnvironment execEnv,
		TypeInformation<RowData> typeInfo,
		HiveTableInputFormat inputFormat,
		HiveTablePartition hiveTable) {
	HiveTableFileInputFormat fileInputFormat = new HiveTableFileInputFormat(inputFormat, hiveTable);

	Configuration configuration = new Configuration();
	catalogTable.getOptions().forEach(configuration::setString);
	String consumeOrderStr = configuration.get(STREAMING_SOURCE_CONSUME_ORDER);
	ConsumeOrder consumeOrder = ConsumeOrder.getConsumeOrder(consumeOrderStr);
	if (consumeOrder != ConsumeOrder.CREATE_TIME_ORDER) {
		throw new UnsupportedOperationException(
				"Only " + ConsumeOrder.CREATE_TIME_ORDER + " is supported for non partition table.");
	}

	String consumeOffset = configuration.get(STREAMING_SOURCE_CONSUME_START_OFFSET);
	// to Local zone mills instead of UTC mills
	long currentReadTime = TimestampData.fromLocalDateTime(toLocalDateTime(consumeOffset))
			.toTimestamp().getTime();

	Duration monitorInterval = configuration.get(STREAMING_SOURCE_MONITOR_INTERVAL);

	ContinuousFileMonitoringFunction<RowData> monitoringFunction =
			new ContinuousFileMonitoringFunction<>(
					fileInputFormat,
					FileProcessingMode.PROCESS_CONTINUOUSLY,
					execEnv.getParallelism(),
					monitorInterval.toMillis(),
					currentReadTime);

	ContinuousFileReaderOperatorFactory<RowData, TimestampedFileInputSplit> factory =
			new ContinuousFileReaderOperatorFactory<>(fileInputFormat);

	String sourceName = "HiveFileMonitoringFunction";
	SingleOutputStreamOperator<RowData> source = execEnv.addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, factory);

	return new DataStreamSource<>(source);
}
 
Example #10
Source File: SimpleFlinkStreamingCounterDataSource.java    From jMetalSP with MIT License 5 votes vote down vote up
@Override
public void run() {

    JMetalLogger.logger.info("Run Fink method in the streaming data source invoked") ;
    JMetalLogger.logger.info("Directory: " + directoryName) ;

   // environment.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(1,0));
    //environment.enableCheckpointing(10);
    Path filePath = new Path(directoryName);
    TextInputFormat inputFormat = new TextInputFormat(filePath);
    inputFormat.setFilesFilter(FilePathFilter.createDefaultFilter());
    DataStreamSource<String> data =environment.readFile(inputFormat,directoryName,
            FileProcessingMode.PROCESS_CONTINUOUSLY,time);


    try {
        Iterator<String> it=DataStreamUtils.collect(data);
        while (it.hasNext()){
            Integer number = Integer.parseInt(it.next());
            observable.setChanged();
            observable.notifyObservers(new ObservedValue<Integer>(number));
        }

    } catch (Exception e){
        e.printStackTrace();
    }


}
 
Example #11
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 5 votes vote down vote up
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat,
													TypeInformation<OUT> typeInfo,
													String sourceName,
													FileProcessingMode monitoringMode,
													long interval) {

	Preconditions.checkNotNull(inputFormat, "Unspecified file input format.");
	Preconditions.checkNotNull(typeInfo, "Unspecified output type information.");
	Preconditions.checkNotNull(sourceName, "Unspecified name for the source.");
	Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode.");

	Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) ||
			interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL,
		"The path monitoring interval cannot be less than " +
				ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms.");

	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval);

	ContinuousFileReaderOperator<OUT> reader =
		new ContinuousFileReaderOperator<>(inputFormat);

	SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, reader);

	return new DataStreamSource<>(source);
}
 
Example #12
Source File: ContinuousFileProcessingCheckpointITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// set the restart strategy.
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0));
	env.enableCheckpointing(10);

	// create and start the file creating thread.
	fc = new FileCreator();
	fc.start();

	// create the monitoring source along with the necessary readers.
	TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	DataStream<String> inputStream = env.readFile(format, localFsURI,
		FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL);

	TestingSinkFunction sink = new TestingSinkFunction();

	inputStream.flatMap(new FlatMapFunction<String, String>() {
		@Override
		public void flatMap(String value, Collector<String> out) throws Exception {
			out.collect(value);
		}
	}).addSink(sink).setParallelism(1);
}
 
Example #13
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSortingOnModTime() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final long[] modTimes = new long[NO_OF_FILES];
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];

	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		Thread.sleep(400);

		filesCreated[i] = file.f0;
		modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime();
	}

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// this is just to verify that all splits have been forwarded later.
	FileInputSplit[] splits = format.createInputSplits(1);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE);

	ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes);

	monitoringFunction.open(new Configuration());
	monitoringFunction.run(context);
	Assert.assertEquals(splits.length, context.getCounter());

	// delete the created files.
	for (int i = 0; i < NO_OF_FILES; i++) {
		hdfs.delete(filesCreated[i], false);
	}
}
 
Example #14
Source File: ContinuousFileProcessingCheckpointITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void testProgram(StreamExecutionEnvironment env) {

	// set the restart strategy.
	env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0));
	env.enableCheckpointing(10);

	// create and start the file creating thread.
	fc = new FileCreator();
	fc.start();

	// create the monitoring source along with the necessary readers.
	TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	DataStream<String> inputStream = env.readFile(format, localFsURI,
		FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL);

	TestingSinkFunction sink = new TestingSinkFunction();

	inputStream.flatMap(new FlatMapFunction<String, String>() {
		@Override
		public void flatMap(String value, Collector<String> out) throws Exception {
			out.collect(value);
		}
	}).addSink(sink).setParallelism(1);
}
 
Example #15
Source File: InsideDataSource.java    From flink-simple-tutorial with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


        // 添加数组作为数据输入源
        String[] elementInput = new String[]{"hello Flink", "Second Line"};
        DataStream<String> text = env.fromElements(elementInput);

        // 添加List集合作为数据输入源
        List<String> collectionInput = new ArrayList<>();
        collectionInput.add("hello Flink");
        DataStream<String> text2 = env.fromCollection(collectionInput);

        // 添加Socket作为数据输入源
        // 4个参数 -> (hostname:Ip地址, port:端口, delimiter:分隔符, maxRetry:最大重试次数)
        DataStream<String> text3 = env.socketTextStream("localhost", 9999, "\n", 4);


        // 添加文件源
        // 直接读取文本文件
        DataStream<String> text4 = env.readTextFile("/opt/history.log");
        // 指定 CsvInputFormat, 监控csv文件(两种模式), 时间间隔是10ms
        DataStream<String> text5 = env.readFile(new CsvInputFormat<String>(new Path("/opt/history.csv")) {
            @Override
            protected String fillRecord(String s, Object[] objects) {
                return null;
            }
        },"/opt/history.csv", FileProcessingMode.PROCESS_CONTINUOUSLY,10);

        text.print();

        env.execute("Inside DataSource Demo");
    }
 
Example #16
Source File: StreamExecutionEnvironment.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat,
													TypeInformation<OUT> typeInfo,
													String sourceName,
													FileProcessingMode monitoringMode,
													long interval) {

	Preconditions.checkNotNull(inputFormat, "Unspecified file input format.");
	Preconditions.checkNotNull(typeInfo, "Unspecified output type information.");
	Preconditions.checkNotNull(sourceName, "Unspecified name for the source.");
	Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode.");

	Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) ||
			interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL,
		"The path monitoring interval cannot be less than " +
				ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms.");

	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval);

	ContinuousFileReaderOperator<OUT> reader =
		new ContinuousFileReaderOperator<>(inputFormat);

	SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, reader);

	return new DataStreamSource<>(source);
}
 
Example #17
Source File: ContinuousFileProcessingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Create continuous monitoring function with 1 reader-parallelism and interval: {@link #INTERVAL}.
 */
private <OUT> ContinuousFileMonitoringFunction<OUT> createTestContinuousFileMonitoringFunction(FileInputFormat<OUT> format, FileProcessingMode fileProcessingMode) {
	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format, fileProcessingMode, 1, INTERVAL);
	monitoringFunction.setRuntimeContext(Mockito.mock(RuntimeContext.class));
	return monitoringFunction;
}
 
Example #18
Source File: ContinuousFileProcessingTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testSortingOnModTime() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final long[] modTimes = new long[NO_OF_FILES];
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];

	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		Thread.sleep(400);

		filesCreated[i] = file.f0;
		modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime();
	}

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// this is just to verify that all splits have been forwarded later.
	FileInputSplit[] splits = format.createInputSplits(1);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE);

	ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes);

	monitoringFunction.open(new Configuration());
	monitoringFunction.run(context);
	Assert.assertEquals(splits.length, context.getCounter());

	// delete the created files.
	for (int i = 0; i < NO_OF_FILES; i++) {
		hdfs.delete(filesCreated[i], false);
	}
}
 
Example #19
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testProcessOnce() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final OneShotLatch latch = new OneShotLatch();

	// create a single file in the directory
	Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
		createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
	Assert.assertTrue(hdfs.exists(bootstrap.f0));

	// the source is supposed to read only this file.
	final Set<String> filesToBeRead = new TreeSet<>();
	filesToBeRead.add(bootstrap.f0.getName());

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE);

	final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction);

	final Thread t = new Thread() {
		@Override
		public void run() {
			try {
				monitoringFunction.open(new Configuration());
				monitoringFunction.run(context);

				// we would never arrive here if we were in
				// PROCESS_CONTINUOUSLY mode.

				// this will trigger the latch
				context.close();

			} catch (Exception e) {
				Assert.fail(e.getMessage());
			}
		}
	};
	t.start();

	if (!latch.isTriggered()) {
		latch.await();
	}

	// create some additional files that should be processed in the case of PROCESS_CONTINUOUSLY
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> ignoredFile =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		filesCreated[i] = ignoredFile.f0;
	}

	// wait until the monitoring thread exits
	t.join();

	Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());

	// finally delete the files created for the test.
	hdfs.delete(bootstrap.f0, false);
	for (org.apache.hadoop.fs.Path path: filesCreated) {
		hdfs.delete(path, false);
	}
}
 
Example #20
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}.
 *
 * <p>The data stream is typed to the given TypeInformation. This method is intended for input formats
 * where the return type cannot be determined by reflection analysis, and that do not implement the
 * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface.
 *
 * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source
 * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the
 * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards
 * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits,
 * without waiting for the readers to finish reading. This implies that no more checkpoint
 * barriers are going to be forwarded after the source exits, thus having no checkpoints.
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param typeInfo
 * 		The information about the type of the output type
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data created by the input format
 */
@PublicEvolving
public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> typeInfo) {
	DataStreamSource<OUT> source;

	if (inputFormat instanceof FileInputFormat) {
		@SuppressWarnings("unchecked")
		FileInputFormat<OUT> format = (FileInputFormat<OUT>) inputFormat;

		source = createFileInput(format, typeInfo, "Custom File source",
				FileProcessingMode.PROCESS_ONCE, -1);
	} else {
		source = createInput(inputFormat, typeInfo, "Custom Source");
	}
	return source;
}
 
Example #21
Source File: StreamExecutionEnvironment.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}.
 *
 * <p>The data stream is typed to the given TypeInformation. This method is intended for input formats
 * where the return type cannot be determined by reflection analysis, and that do not implement the
 * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface.
 *
 * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source
 * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the
 * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards
 * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits,
 * without waiting for the readers to finish reading. This implies that no more checkpoint
 * barriers are going to be forwarded after the source exits, thus having no checkpoints.
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param typeInfo
 * 		The information about the type of the output type
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data created by the input format
 */
@PublicEvolving
public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> typeInfo) {
	DataStreamSource<OUT> source;

	if (inputFormat instanceof FileInputFormat) {
		@SuppressWarnings("unchecked")
		FileInputFormat<OUT> format = (FileInputFormat<OUT>) inputFormat;

		source = createFileInput(format, typeInfo, "Custom File source",
				FileProcessingMode.PROCESS_ONCE, -1);
	} else {
		source = createInput(inputFormat, typeInfo, "Custom Source");
	}
	return source;
}
 
Example #22
Source File: ContinuousFileProcessingTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testFunctionRestore() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	org.apache.hadoop.fs.Path path = null;
	long fileModTime = Long.MIN_VALUE;
	for (int i = 0; i < 1; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		path = file.f0;
		fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
	}

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);

	StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src =
		new StreamSource<>(monitoringFunction);

	final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness =
		new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
	testHarness.open();

	final Throwable[] error = new Throwable[1];

	final OneShotLatch latch = new OneShotLatch();

	final DummySourceContext sourceContext = new DummySourceContext() {
		@Override
		public void collect(TimestampedFileInputSplit element) {
			latch.trigger();
		}
	};

	// run the source asynchronously
	Thread runner = new Thread() {
		@Override
		public void run() {
			try {
				monitoringFunction.run(sourceContext);
			}
			catch (Throwable t) {
				t.printStackTrace();
				error[0] = t;
			}
		}
	};
	runner.start();

	// first condition for the source to have updated its state: emit at least one element
	if (!latch.isTriggered()) {
		latch.await();
	}

	// second condition for the source to have updated its state: it's not on the lock anymore,
	// this means it has processed all the splits and updated its state.
	synchronized (sourceContext.getCheckpointLock()) {}

	OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
	monitoringFunction.cancel();
	runner.join();

	testHarness.close();

	final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);

	StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy =
		new StreamSource<>(monitoringFunctionCopy);

	AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy =
		new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
	testHarnessCopy.initializeState(snapshot);
	testHarnessCopy.open();

	Assert.assertNull(error[0]);
	Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime());

	hdfs.delete(path, false);
}
 
Example #23
Source File: ContinuousFileProcessingTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testProcessOnce() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final OneShotLatch latch = new OneShotLatch();

	// create a single file in the directory
	Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
		createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
	Assert.assertTrue(hdfs.exists(bootstrap.f0));

	// the source is supposed to read only this file.
	final Set<String> filesToBeRead = new TreeSet<>();
	filesToBeRead.add(bootstrap.f0.getName());

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE);

	final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction);

	final Thread t = new Thread() {
		@Override
		public void run() {
			try {
				monitoringFunction.open(new Configuration());
				monitoringFunction.run(context);

				// we would never arrive here if we were in
				// PROCESS_CONTINUOUSLY mode.

				// this will trigger the latch
				context.close();

			} catch (Exception e) {
				Assert.fail(e.getMessage());
			}
		}
	};
	t.start();

	if (!latch.isTriggered()) {
		latch.await();
	}

	// create some additional files that should be processed in the case of PROCESS_CONTINUOUSLY
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> ignoredFile =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		filesCreated[i] = ignoredFile.f0;
	}

	// wait until the monitoring thread exits
	t.join();

	Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());

	// finally delete the files created for the test.
	hdfs.delete(bootstrap.f0, false);
	for (org.apache.hadoop.fs.Path path: filesCreated) {
		hdfs.delete(path, false);
	}
}
 
Example #24
Source File: ContinuousFileProcessingITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testProgram() throws Exception {

	/*
	* This test checks the interplay between the monitor and the reader
	* and also the failExternally() functionality. To test the latter we
	* set the parallelism to 1 so that we have the chaining between the sink,
	* which throws the SuccessException to signal the end of the test, and the
	* reader.
	* */

	TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
	format.setFilePath(hdfsURI);
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	// create the stream execution environment with a parallelism > 1 to test
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(PARALLELISM);

	ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format,
			FileProcessingMode.PROCESS_CONTINUOUSLY,
			env.getParallelism(), INTERVAL);

	// the monitor has always DOP 1
	DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction);
	Assert.assertEquals(1, splits.getParallelism());

	TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format);

	// the readers can be multiple
	DataStream<String> content = splits.transform("FileSplitReader", typeInfo, new ContinuousFileReaderOperatorFactory<>(format));
	Assert.assertEquals(PARALLELISM, content.getParallelism());

	// finally for the sink we set the parallelism to 1 so that we can verify the output
	TestingSinkFunction sink = new TestingSinkFunction();
	content.addSink(sink).setParallelism(1);

	CompletableFuture<Void> jobFuture = new CompletableFuture<>();
	new Thread(() -> {
		try {
			env.execute("ContinuousFileProcessingITCase Job.");
			jobFuture.complete(null);
		} catch (Exception e) {
			if (ExceptionUtils.findThrowable(e, SuccessException.class).isPresent()) {
				jobFuture.complete(null);
			} else {
				jobFuture.completeExceptionally(e);
			}
		}
	}).start();

	// The modification time of the last created file.
	long lastCreatedModTime = Long.MIN_VALUE;

	// create the files to be read
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
		long modTime;
		do {

			// give it some time so that the files have
			// different modification timestamps.
			Thread.sleep(50);

			tmpFile = fillWithData(hdfsURI, "file", i, "This is test line.");

			modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime();
			if (modTime <= lastCreatedModTime) {
				// delete the last created file to recreate it with a different timestamp
				hdfs.delete(tmpFile.f0, false);
			}
		} while (modTime <= lastCreatedModTime);
		lastCreatedModTime = modTime;

		// put the contents in the expected results list before the reader picks them
		// this is to guarantee that they are in before the reader finishes (avoid race conditions)
		expectedContents.put(i, tmpFile.f1);

		org.apache.hadoop.fs.Path file =
			new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i);
		hdfs.rename(tmpFile.f0, file);
		Assert.assertTrue(hdfs.exists(file));
	}

	jobFuture.get();
}
 
Example #25
Source File: ContinuousFileProcessingMigrationTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Manually run this to write binary snapshot data. Remove @Ignore to run.
 */
@Ignore
@Test
public void writeMonitoringSourceSnapshot() throws Exception {

	File testFolder = tempFolder.newFolder();

	long fileModTime = Long.MIN_VALUE;
	for (int i = 0; i < 1; i++) {
		Tuple2<File, String> file = createFileAndFillWithData(testFolder, "file", i, "This is test line.");
		fileModTime = file.f0.lastModified();
	}

	TextInputFormat format = new TextInputFormat(new Path(testFolder.getAbsolutePath()));

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);

	StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src =
		new StreamSource<>(monitoringFunction);

	final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness =
			new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);

	testHarness.open();

	final Throwable[] error = new Throwable[1];

	final OneShotLatch latch = new OneShotLatch();

	// run the source asynchronously
	Thread runner = new Thread() {
		@Override
		public void run() {
			try {
				monitoringFunction.run(new DummySourceContext() {
					@Override
					public void collect(TimestampedFileInputSplit element) {
						latch.trigger();
					}

					@Override
					public void markAsTemporarilyIdle() {

					}
				});
			}
			catch (Throwable t) {
				t.printStackTrace();
				error[0] = t;
			}
		}
	};
	runner.start();

	if (!latch.isTriggered()) {
		latch.await();
	}

	final OperatorSubtaskState snapshot;
	synchronized (testHarness.getCheckpointLock()) {
		snapshot = testHarness.snapshot(0L, 0L);
	}

	OperatorSnapshotUtil.writeStateHandle(
			snapshot,
			"src/test/resources/monitoring-function-migration-test-" + fileModTime + "-flink" + flinkGenerateSavepointVersion + "-snapshot");

	monitoringFunction.cancel();
	runner.join();

	testHarness.close();
}
 
Example #26
Source File: ContinuousFileProcessingTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testProcessContinuously() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final OneShotLatch latch = new OneShotLatch();

	// create a single file in the directory
	Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
		createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
	Assert.assertTrue(hdfs.exists(bootstrap.f0));

	final Set<String> filesToBeRead = new TreeSet<>();
	filesToBeRead.add(bootstrap.f0.getName());

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);

	final int totalNoOfFilesToBeRead = NO_OF_FILES + 1; // 1 for the bootstrap + NO_OF_FILES
	final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch,
		monitoringFunction, 1, totalNoOfFilesToBeRead);

	final Thread t = new Thread() {

		@Override
		public void run() {
			try {
				monitoringFunction.open(new Configuration());
				monitoringFunction.run(context);
			} catch (Exception e) {
				Assert.fail(e.getMessage());
			}
		}
	};
	t.start();

	if (!latch.isTriggered()) {
		latch.await();
	}

	// create some additional files that will be processed in the case of PROCESS_CONTINUOUSLY
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		filesCreated[i] = file.f0;
		filesToBeRead.add(file.f0.getName());
	}

	// wait until the monitoring thread exits
	t.join();

	Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());

	// finally delete the files created for the test.
	hdfs.delete(bootstrap.f0, false);
	for (org.apache.hadoop.fs.Path path: filesCreated) {
		hdfs.delete(path, false);
	}
}
 
Example #27
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testFunctionRestore() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	org.apache.hadoop.fs.Path path = null;
	long fileModTime = Long.MIN_VALUE;
	for (int i = 0; i < 1; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		path = file.f0;
		fileModTime = hdfs.getFileStatus(file.f0).getModificationTime();
	}

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);

	StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src =
		new StreamSource<>(monitoringFunction);

	final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness =
		new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);
	testHarness.open();

	final Throwable[] error = new Throwable[1];

	final OneShotLatch latch = new OneShotLatch();

	final DummySourceContext sourceContext = new DummySourceContext() {
		@Override
		public void collect(TimestampedFileInputSplit element) {
			latch.trigger();
		}
	};

	// run the source asynchronously
	Thread runner = new Thread() {
		@Override
		public void run() {
			try {
				monitoringFunction.run(sourceContext);
			}
			catch (Throwable t) {
				t.printStackTrace();
				error[0] = t;
			}
		}
	};
	runner.start();

	// first condition for the source to have updated its state: emit at least one element
	if (!latch.isTriggered()) {
		latch.await();
	}

	// second condition for the source to have updated its state: it's not on the lock anymore,
	// this means it has processed all the splits and updated its state.
	synchronized (sourceContext.getCheckpointLock()) {}

	OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
	monitoringFunction.cancel();
	runner.join();

	testHarness.close();

	final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);

	StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy =
		new StreamSource<>(monitoringFunctionCopy);

	AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy =
		new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0);
	testHarnessCopy.initializeState(snapshot);
	testHarnessCopy.open();

	Assert.assertNull(error[0]);
	Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime());

	hdfs.delete(path, false);
}
 
Example #28
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testProcessContinuously() throws Exception {
	String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/";

	final OneShotLatch latch = new OneShotLatch();

	// create a single file in the directory
	Tuple2<org.apache.hadoop.fs.Path, String> bootstrap =
		createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line.");
	Assert.assertTrue(hdfs.exists(bootstrap.f0));

	final Set<String> filesToBeRead = new TreeSet<>();
	filesToBeRead.add(bootstrap.f0.getName());

	TextInputFormat format = new TextInputFormat(new Path(testBasePath));
	format.setFilesFilter(FilePathFilter.createDefaultFilter());

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY);

	final int totalNoOfFilesToBeRead = NO_OF_FILES + 1; // 1 for the bootstrap + NO_OF_FILES
	final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch,
		monitoringFunction, 1, totalNoOfFilesToBeRead);

	final Thread t = new Thread() {

		@Override
		public void run() {
			try {
				monitoringFunction.open(new Configuration());
				monitoringFunction.run(context);
			} catch (Exception e) {
				Assert.fail(e.getMessage());
			}
		}
	};
	t.start();

	if (!latch.isTriggered()) {
		latch.await();
	}

	// create some additional files that will be processed in the case of PROCESS_CONTINUOUSLY
	final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES];
	for (int i = 0; i < NO_OF_FILES; i++) {
		Tuple2<org.apache.hadoop.fs.Path, String> file =
			createFileAndFillWithData(testBasePath, "file", i, "This is test line.");
		filesCreated[i] = file.f0;
		filesToBeRead.add(file.f0.getName());
	}

	// wait until the monitoring thread exits
	t.join();

	Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray());

	// finally delete the files created for the test.
	hdfs.delete(bootstrap.f0, false);
	for (org.apache.hadoop.fs.Path path: filesCreated) {
		hdfs.delete(path, false);
	}
}
 
Example #29
Source File: ContinuousFileProcessingMigrationTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Manually run this to write binary snapshot data. Remove @Ignore to run.
 */
@Ignore
@Test
public void writeMonitoringSourceSnapshot() throws Exception {

	File testFolder = tempFolder.newFolder();

	long fileModTime = Long.MIN_VALUE;
	for (int i = 0; i < 1; i++) {
		Tuple2<File, String> file = createFileAndFillWithData(testFolder, "file", i, "This is test line.");
		fileModTime = file.f0.lastModified();
	}

	TextInputFormat format = new TextInputFormat(new Path(testFolder.getAbsolutePath()));

	final ContinuousFileMonitoringFunction<String> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL);

	StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src =
		new StreamSource<>(monitoringFunction);

	final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness =
			new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0);

	testHarness.open();

	final Throwable[] error = new Throwable[1];

	final OneShotLatch latch = new OneShotLatch();

	// run the source asynchronously
	Thread runner = new Thread() {
		@Override
		public void run() {
			try {
				monitoringFunction.run(new DummySourceContext() {
					@Override
					public void collect(TimestampedFileInputSplit element) {
						latch.trigger();
					}

					@Override
					public void markAsTemporarilyIdle() {

					}
				});
			}
			catch (Throwable t) {
				t.printStackTrace();
				error[0] = t;
			}
		}
	};
	runner.start();

	if (!latch.isTriggered()) {
		latch.await();
	}

	final OperatorSubtaskState snapshot;
	synchronized (testHarness.getCheckpointLock()) {
		snapshot = testHarness.snapshot(0L, 0L);
	}

	OperatorSnapshotUtil.writeStateHandle(
			snapshot,
			"src/test/resources/monitoring-function-migration-test-" + fileModTime + "-flink" + flinkGenerateSavepointVersion + "-snapshot");

	monitoringFunction.cancel();
	runner.join();

	testHarness.close();
}
 
Example #30
Source File: StreamExecutionEnvironment.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Reads the contents of the user-specified {@code filePath} based on the given {@link FileInputFormat}. Depending
 * on the provided {@link FileProcessingMode}.
 *
 * <p>See {@link #readFile(FileInputFormat, String, FileProcessingMode, long)}
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param filePath
 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
 * @param watchType
 * 		The mode in which the source should operate, i.e. monitor path and react to new data, or process once and exit
 * @param interval
 * 		In the case of periodic path monitoring, this specifies the interval (in millis) between consecutive path scans
 * @param filter
 * 		The files to be excluded from the processing
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data read from the given file
 *
 * @deprecated Use {@link FileInputFormat#setFilesFilter(FilePathFilter)} to set a filter and
 * 		{@link StreamExecutionEnvironment#readFile(FileInputFormat, String, FileProcessingMode, long)}
 *
 */
@PublicEvolving
@Deprecated
public <OUT> DataStreamSource<OUT> readFile(FileInputFormat<OUT> inputFormat,
											String filePath,
											FileProcessingMode watchType,
											long interval,
											FilePathFilter filter) {
	inputFormat.setFilesFilter(filter);

	TypeInformation<OUT> typeInformation;
	try {
		typeInformation = TypeExtractor.getInputFormatTypes(inputFormat);
	} catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be " +
				"automatically determined. Please specify the TypeInformation of the produced type " +
				"explicitly by using the 'createInput(InputFormat, TypeInformation)' method instead.");
	}
	return readFile(inputFormat, filePath, watchType, interval, typeInformation);
}