Java Code Examples for org.apache.flink.api.common.io.FileInputFormat

The following examples show how to use org.apache.flink.api.common.io.FileInputFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: ExecutionEnvironment.java    License: Apache License 2.0 6 votes vote down vote up
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) {
	if (inputFormat == null) {
		throw new IllegalArgumentException("InputFormat must not be null.");
	}
	if (filePath == null) {
		throw new IllegalArgumentException("The file path must not be null.");
	}

	inputFormat.setFilePath(new Path(filePath));
	try {
		return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat));
	}
	catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " +
				"Please specify the TypeInformation of the produced type explicitly by using the " +
				"'createInput(InputFormat, TypeInformation)' method instead.");
	}
}
 
Example 2
private SplitReader(FileInputFormat<OT> format,
			TypeSerializer<OT> serializer,
			SourceFunction.SourceContext<OT> readerContext,
			Object checkpointLock,
			List<TimestampedFileInputSplit> restoredState) {

	this.format = checkNotNull(format, "Unspecified FileInputFormat.");
	this.serializer = checkNotNull(serializer, "Unspecified Serializer.");
	this.readerContext = checkNotNull(readerContext, "Unspecified Reader Context.");
	this.checkpointLock = checkNotNull(checkpointLock, "Unspecified checkpoint lock.");

	this.shouldClose = false;
	this.isRunning = true;

	this.pendingSplits = new PriorityQueue<>();

	// this is the case where a task recovers from a previous failed attempt
	if (restoredState != null) {
		this.pendingSplits.addAll(restoredState);
	}
}
 
Example 3
public ContinuousFileMonitoringFunction(
	FileInputFormat<OUT> format,
	FileProcessingMode watchType,
	int readerParallelism,
	long interval) {

	Preconditions.checkArgument(
		watchType == FileProcessingMode.PROCESS_ONCE || interval >= MIN_MONITORING_INTERVAL,
		"The specified monitoring interval (" + interval + " ms) is smaller than the minimum " +
			"allowed one (" + MIN_MONITORING_INTERVAL + " ms)."
	);

	Preconditions.checkArgument(
		format.getFilePaths().length == 1,
		"FileInputFormats with multiple paths are not supported yet.");

	this.format = Preconditions.checkNotNull(format, "Unspecified File Input Format.");
	this.path = Preconditions.checkNotNull(format.getFilePaths()[0].toString(), "Unspecified Path.");

	this.interval = interval;
	this.watchType = watchType;
	this.readerParallelism = Math.max(readerParallelism, 1);
	this.globalModificationTime = Long.MIN_VALUE;
}
 
Example 4
Source Project: flink   Source File: ExecutionEnvironment.java    License: Apache License 2.0 6 votes vote down vote up
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) {
	if (inputFormat == null) {
		throw new IllegalArgumentException("InputFormat must not be null.");
	}
	if (filePath == null) {
		throw new IllegalArgumentException("The file path must not be null.");
	}

	inputFormat.setFilePath(new Path(filePath));
	try {
		return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat));
	}
	catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " +
				"Please specify the TypeInformation of the produced type explicitly by using the " +
				"'createInput(InputFormat, TypeInformation)' method instead.");
	}
}
 
Example 5
Source Project: flink   Source File: ContinuousFileReaderOperator.java    License: Apache License 2.0 6 votes vote down vote up
private SplitReader(FileInputFormat<OT> format,
			TypeSerializer<OT> serializer,
			SourceFunction.SourceContext<OT> readerContext,
			Object checkpointLock,
			List<TimestampedFileInputSplit> restoredState) {

	this.format = checkNotNull(format, "Unspecified FileInputFormat.");
	this.serializer = checkNotNull(serializer, "Unspecified Serializer.");
	this.readerContext = checkNotNull(readerContext, "Unspecified Reader Context.");
	this.checkpointLock = checkNotNull(checkpointLock, "Unspecified checkpoint lock.");

	this.shouldClose = false;
	this.isRunning = true;

	this.pendingSplits = new PriorityQueue<>();

	// this is the case where a task recovers from a previous failed attempt
	if (restoredState != null) {
		this.pendingSplits.addAll(restoredState);
	}
}
 
Example 6
Source Project: flink   Source File: ContinuousFileMonitoringFunction.java    License: Apache License 2.0 6 votes vote down vote up
public ContinuousFileMonitoringFunction(
	FileInputFormat<OUT> format,
	FileProcessingMode watchType,
	int readerParallelism,
	long interval) {

	Preconditions.checkArgument(
		watchType == FileProcessingMode.PROCESS_ONCE || interval >= MIN_MONITORING_INTERVAL,
		"The specified monitoring interval (" + interval + " ms) is smaller than the minimum " +
			"allowed one (" + MIN_MONITORING_INTERVAL + " ms)."
	);

	Preconditions.checkArgument(
		format.getFilePaths().length == 1,
		"FileInputFormats with multiple paths are not supported yet.");

	this.format = Preconditions.checkNotNull(format, "Unspecified File Input Format.");
	this.path = Preconditions.checkNotNull(format.getFilePaths()[0].toString(), "Unspecified Path.");

	this.interval = interval;
	this.watchType = watchType;
	this.readerParallelism = Math.max(readerParallelism, 1);
	this.globalModificationTime = Long.MIN_VALUE;
}
 
Example 7
Source Project: flink   Source File: ExecutionEnvironment.java    License: Apache License 2.0 6 votes vote down vote up
public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) {
	if (inputFormat == null) {
		throw new IllegalArgumentException("InputFormat must not be null.");
	}
	if (filePath == null) {
		throw new IllegalArgumentException("The file path must not be null.");
	}

	inputFormat.setFilePath(new Path(filePath));
	try {
		return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat));
	}
	catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " +
				"Please specify the TypeInformation of the produced type explicitly by using the " +
				"'createInput(InputFormat, TypeInformation)' method instead.");
	}
}
 
Example 8
Source Project: flink   Source File: ContinuousFileMonitoringFunction.java    License: Apache License 2.0 6 votes vote down vote up
public ContinuousFileMonitoringFunction(
	FileInputFormat<OUT> format,
	FileProcessingMode watchType,
	int readerParallelism,
	long interval,
	long globalModificationTime) {

	Preconditions.checkArgument(
		watchType == FileProcessingMode.PROCESS_ONCE || interval >= MIN_MONITORING_INTERVAL,
		"The specified monitoring interval (" + interval + " ms) is smaller than the minimum " +
			"allowed one (" + MIN_MONITORING_INTERVAL + " ms)."
	);

	Preconditions.checkArgument(
		format.getFilePaths().length == 1,
		"FileInputFormats with multiple paths are not supported yet.");

	this.format = Preconditions.checkNotNull(format, "Unspecified File Input Format.");
	this.path = Preconditions.checkNotNull(format.getFilePaths()[0].toString(), "Unspecified Path.");

	this.interval = interval;
	this.watchType = watchType;
	this.readerParallelism = Math.max(readerParallelism, 1);
	this.globalModificationTime = globalModificationTime;
}
 
Example 9
/**
 * Create continuous monitoring function with 1 reader-parallelism and interval: {@link #INTERVAL}.
 */
private <OUT> ContinuousFileMonitoringFunction<OUT> createTestContinuousFileMonitoringFunction(FileInputFormat<OUT> format, FileProcessingMode fileProcessingMode) {
	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format, fileProcessingMode, 1, INTERVAL);
	monitoringFunction.setRuntimeContext(Mockito.mock(RuntimeContext.class));
	return monitoringFunction;
}
 
Example 10
Source Project: Flink-CEPplus   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 5 votes vote down vote up
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat,
													TypeInformation<OUT> typeInfo,
													String sourceName,
													FileProcessingMode monitoringMode,
													long interval) {

	Preconditions.checkNotNull(inputFormat, "Unspecified file input format.");
	Preconditions.checkNotNull(typeInfo, "Unspecified output type information.");
	Preconditions.checkNotNull(sourceName, "Unspecified name for the source.");
	Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode.");

	Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) ||
			interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL,
		"The path monitoring interval cannot be less than " +
				ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms.");

	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval);

	ContinuousFileReaderOperator<OUT> reader =
		new ContinuousFileReaderOperator<>(inputFormat);

	SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, reader);

	return new DataStreamSource<>(source);
}
 
Example 11
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create continuous monitoring function with 1 reader-parallelism and interval: {@link #INTERVAL}.
 */
private <OUT> ContinuousFileMonitoringFunction<OUT> createTestContinuousFileMonitoringFunction(FileInputFormat<OUT> format, FileProcessingMode fileProcessingMode) {
	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format, fileProcessingMode, 1, INTERVAL);
	monitoringFunction.setRuntimeContext(Mockito.mock(RuntimeContext.class));
	return monitoringFunction;
}
 
Example 12
Source Project: flink   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 5 votes vote down vote up
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat,
													TypeInformation<OUT> typeInfo,
													String sourceName,
													FileProcessingMode monitoringMode,
													long interval) {

	Preconditions.checkNotNull(inputFormat, "Unspecified file input format.");
	Preconditions.checkNotNull(typeInfo, "Unspecified output type information.");
	Preconditions.checkNotNull(sourceName, "Unspecified name for the source.");
	Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode.");

	Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) ||
			interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL,
		"The path monitoring interval cannot be less than " +
				ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms.");

	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval);

	ContinuousFileReaderOperator<OUT> reader =
		new ContinuousFileReaderOperator<>(inputFormat);

	SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, reader);

	return new DataStreamSource<>(source);
}
 
Example 13
Source Project: flink   Source File: ContinuousFileProcessingTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create continuous monitoring function with 1 reader-parallelism and interval: {@link #INTERVAL}.
 */
private <OUT> ContinuousFileMonitoringFunction<OUT> createTestContinuousFileMonitoringFunction(FileInputFormat<OUT> format, FileProcessingMode fileProcessingMode) {
	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(format, fileProcessingMode, 1, INTERVAL);
	monitoringFunction.setRuntimeContext(Mockito.mock(RuntimeContext.class));
	return monitoringFunction;
}
 
Example 14
Source Project: flink   Source File: Utils.java    License: Apache License 2.0 5 votes vote down vote up
public static <OUT> OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, OUT> createContinuousFileProcessingTestHarness(
	FileInputFormat<OUT> inputFormat,
	TypeInformation<OUT> outTypeInfo,
	ExecutionConfig executionConfig) throws Exception {

	OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, OUT> testHarness =
		new OneInputStreamOperatorTestHarness<>(new ContinuousFileReaderOperatorFactory<>(inputFormat));
	testHarness.getOperatorFactory().setOutputType(
		outTypeInfo,
		executionConfig == null ? testHarness.getExecutionConfig() : executionConfig);

	return testHarness;
}
 
Example 15
Source Project: flink   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 5 votes vote down vote up
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat,
													TypeInformation<OUT> typeInfo,
													String sourceName,
													FileProcessingMode monitoringMode,
													long interval) {

	Preconditions.checkNotNull(inputFormat, "Unspecified file input format.");
	Preconditions.checkNotNull(typeInfo, "Unspecified output type information.");
	Preconditions.checkNotNull(sourceName, "Unspecified name for the source.");
	Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode.");

	Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) ||
			interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL,
		"The path monitoring interval cannot be less than " +
				ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms.");

	ContinuousFileMonitoringFunction<OUT> monitoringFunction =
		new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval);

	ContinuousFileReaderOperatorFactory<OUT, TimestampedFileInputSplit> factory =
			new ContinuousFileReaderOperatorFactory<>(inputFormat);

	SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName)
			.transform("Split Reader: " + sourceName, typeInfo, factory);

	return new DataStreamSource<>(source);
}
 
Example 16
Source Project: flink   Source File: ContinuousFileMonitoringFunction.java    License: Apache License 2.0 5 votes vote down vote up
public ContinuousFileMonitoringFunction(
		FileInputFormat<OUT> format,
		FileProcessingMode watchType,
		int readerParallelism,
		long interval) {
	this(format, watchType, readerParallelism, interval, Long.MIN_VALUE);
}
 
Example 17
Source Project: flink   Source File: ContinuousFileReaderOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
private FileInputFormat<String> failingFormat() {
	return new FileInputFormat<String>() {
		@Override
		public boolean reachedEnd() {
			return false;
		}

		@Override
		public String nextRecord(String reuse) {
			throw new ExpectedTestException();
		}

		@Override
		public void open(FileInputSplit fileSplit) {
			throw new ExpectedTestException();
		}

		@Override
		public void close() {
			throw new ExpectedTestException();
		}

		@Override
		public void configure(Configuration parameters) {
		}
	};
}
 
Example 18
Source Project: Flink-CEPplus   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Reads the contents of the user-specified {@code filePath} based on the given {@link FileInputFormat}. Depending
 * on the provided {@link FileProcessingMode}.
 *
 * <p>See {@link #readFile(FileInputFormat, String, FileProcessingMode, long)}
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param filePath
 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
 * @param watchType
 * 		The mode in which the source should operate, i.e. monitor path and react to new data, or process once and exit
 * @param interval
 * 		In the case of periodic path monitoring, this specifies the interval (in millis) between consecutive path scans
 * @param filter
 * 		The files to be excluded from the processing
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data read from the given file
 *
 * @deprecated Use {@link FileInputFormat#setFilesFilter(FilePathFilter)} to set a filter and
 * 		{@link StreamExecutionEnvironment#readFile(FileInputFormat, String, FileProcessingMode, long)}
 *
 */
@PublicEvolving
@Deprecated
public <OUT> DataStreamSource<OUT> readFile(FileInputFormat<OUT> inputFormat,
											String filePath,
											FileProcessingMode watchType,
											long interval,
											FilePathFilter filter) {
	inputFormat.setFilesFilter(filter);

	TypeInformation<OUT> typeInformation;
	try {
		typeInformation = TypeExtractor.getInputFormatTypes(inputFormat);
	} catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be " +
				"automatically determined. Please specify the TypeInformation of the produced type " +
				"explicitly by using the 'createInput(InputFormat, TypeInformation)' method instead.");
	}
	return readFile(inputFormat, filePath, watchType, interval, typeInformation);
}
 
Example 19
Source Project: Flink-CEPplus   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Reads the contents of the user-specified {@code filePath} based on the given {@link FileInputFormat}. Depending
 * on the provided {@link FileProcessingMode}, the source may periodically monitor (every {@code interval} ms) the path
 * for new data ({@link FileProcessingMode#PROCESS_CONTINUOUSLY}), or process once the data currently in the path and
 * exit ({@link FileProcessingMode#PROCESS_ONCE}). In addition, if the path contains files not to be processed, the user
 * can specify a custom {@link FilePathFilter}. As a default implementation you can use
 * {@link FilePathFilter#createDefaultFilter()}.
 *
 * <p>Since all data streams need specific information about their types, this method needs to determine the
 * type of the data produced by the input format. It will attempt to determine the data type by reflection,
 * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface.
 * In the latter case, this method will invoke the
 * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data
 * type produced by the input format.
 *
 * <p><b>NOTES ON CHECKPOINTING: </b> If the {@code watchType} is set to {@link FileProcessingMode#PROCESS_ONCE},
 * the source monitors the path <b>once</b>, creates the {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits}
 * to be processed, forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data,
 * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint barriers
 * are going to be forwarded after the source exits, thus having no checkpoints after that point.
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param filePath
 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
 * @param watchType
 * 		The mode in which the source should operate, i.e. monitor path and react to new data, or process once and exit
 * @param interval
 * 		In the case of periodic path monitoring, this specifies the interval (in millis) between consecutive path scans
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data read from the given file
 */
@PublicEvolving
public <OUT> DataStreamSource<OUT> readFile(FileInputFormat<OUT> inputFormat,
											String filePath,
											FileProcessingMode watchType,
											long interval) {

	TypeInformation<OUT> typeInformation;
	try {
		typeInformation = TypeExtractor.getInputFormatTypes(inputFormat);
	} catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be " +
				"automatically determined. Please specify the TypeInformation of the produced type " +
				"explicitly by using the 'createInput(InputFormat, TypeInformation)' method instead.");
	}
	return readFile(inputFormat, filePath, watchType, interval, typeInformation);
}
 
Example 20
Source Project: Flink-CEPplus   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}.
 *
 * <p>The data stream is typed to the given TypeInformation. This method is intended for input formats
 * where the return type cannot be determined by reflection analysis, and that do not implement the
 * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface.
 *
 * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source
 * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the
 * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards
 * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits,
 * without waiting for the readers to finish reading. This implies that no more checkpoint
 * barriers are going to be forwarded after the source exits, thus having no checkpoints.
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param typeInfo
 * 		The information about the type of the output type
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data created by the input format
 */
@PublicEvolving
public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> typeInfo) {
	DataStreamSource<OUT> source;

	if (inputFormat instanceof FileInputFormat) {
		@SuppressWarnings("unchecked")
		FileInputFormat<OUT> format = (FileInputFormat<OUT>) inputFormat;

		source = createFileInput(format, typeInfo, "Custom File source",
				FileProcessingMode.PROCESS_ONCE, -1);
	} else {
		source = createInput(inputFormat, typeInfo, "Custom Source");
	}
	return source;
}
 
Example 21
public ContinuousFileReaderOperator(FileInputFormat<OUT> format) {
	this.format = checkNotNull(format);
}
 
Example 22
Source Project: flink   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Reads the contents of the user-specified {@code filePath} based on the given {@link FileInputFormat}. Depending
 * on the provided {@link FileProcessingMode}.
 *
 * <p>See {@link #readFile(FileInputFormat, String, FileProcessingMode, long)}
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param filePath
 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
 * @param watchType
 * 		The mode in which the source should operate, i.e. monitor path and react to new data, or process once and exit
 * @param interval
 * 		In the case of periodic path monitoring, this specifies the interval (in millis) between consecutive path scans
 * @param filter
 * 		The files to be excluded from the processing
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data read from the given file
 *
 * @deprecated Use {@link FileInputFormat#setFilesFilter(FilePathFilter)} to set a filter and
 * 		{@link StreamExecutionEnvironment#readFile(FileInputFormat, String, FileProcessingMode, long)}
 *
 */
@PublicEvolving
@Deprecated
public <OUT> DataStreamSource<OUT> readFile(FileInputFormat<OUT> inputFormat,
											String filePath,
											FileProcessingMode watchType,
											long interval,
											FilePathFilter filter) {
	inputFormat.setFilesFilter(filter);

	TypeInformation<OUT> typeInformation;
	try {
		typeInformation = TypeExtractor.getInputFormatTypes(inputFormat);
	} catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be " +
				"automatically determined. Please specify the TypeInformation of the produced type " +
				"explicitly by using the 'createInput(InputFormat, TypeInformation)' method instead.");
	}
	return readFile(inputFormat, filePath, watchType, interval, typeInformation);
}
 
Example 23
Source Project: flink   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Reads the contents of the user-specified {@code filePath} based on the given {@link FileInputFormat}. Depending
 * on the provided {@link FileProcessingMode}, the source may periodically monitor (every {@code interval} ms) the path
 * for new data ({@link FileProcessingMode#PROCESS_CONTINUOUSLY}), or process once the data currently in the path and
 * exit ({@link FileProcessingMode#PROCESS_ONCE}). In addition, if the path contains files not to be processed, the user
 * can specify a custom {@link FilePathFilter}. As a default implementation you can use
 * {@link FilePathFilter#createDefaultFilter()}.
 *
 * <p>Since all data streams need specific information about their types, this method needs to determine the
 * type of the data produced by the input format. It will attempt to determine the data type by reflection,
 * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface.
 * In the latter case, this method will invoke the
 * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data
 * type produced by the input format.
 *
 * <p><b>NOTES ON CHECKPOINTING: </b> If the {@code watchType} is set to {@link FileProcessingMode#PROCESS_ONCE},
 * the source monitors the path <b>once</b>, creates the {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits}
 * to be processed, forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data,
 * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint barriers
 * are going to be forwarded after the source exits, thus having no checkpoints after that point.
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param filePath
 * 		The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path")
 * @param watchType
 * 		The mode in which the source should operate, i.e. monitor path and react to new data, or process once and exit
 * @param interval
 * 		In the case of periodic path monitoring, this specifies the interval (in millis) between consecutive path scans
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data read from the given file
 */
@PublicEvolving
public <OUT> DataStreamSource<OUT> readFile(FileInputFormat<OUT> inputFormat,
											String filePath,
											FileProcessingMode watchType,
											long interval) {

	TypeInformation<OUT> typeInformation;
	try {
		typeInformation = TypeExtractor.getInputFormatTypes(inputFormat);
	} catch (Exception e) {
		throw new InvalidProgramException("The type returned by the input format could not be " +
				"automatically determined. Please specify the TypeInformation of the produced type " +
				"explicitly by using the 'createInput(InputFormat, TypeInformation)' method instead.");
	}
	return readFile(inputFormat, filePath, watchType, interval, typeInformation);
}
 
Example 24
Source Project: flink   Source File: StreamExecutionEnvironment.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}.
 *
 * <p>The data stream is typed to the given TypeInformation. This method is intended for input formats
 * where the return type cannot be determined by reflection analysis, and that do not implement the
 * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface.
 *
 * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source
 * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the
 * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards
 * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits,
 * without waiting for the readers to finish reading. This implies that no more checkpoint
 * barriers are going to be forwarded after the source exits, thus having no checkpoints.
 *
 * @param inputFormat
 * 		The input format used to create the data stream
 * @param typeInfo
 * 		The information about the type of the output type
 * @param <OUT>
 * 		The type of the returned data stream
 * @return The data stream that represents the data created by the input format
 */
@PublicEvolving
public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> typeInfo) {
	DataStreamSource<OUT> source;

	if (inputFormat instanceof FileInputFormat) {
		@SuppressWarnings("unchecked")
		FileInputFormat<OUT> format = (FileInputFormat<OUT>) inputFormat;

		source = createFileInput(format, typeInfo, "Custom File source",
				FileProcessingMode.PROCESS_ONCE, -1);
	} else {
		source = createInput(inputFormat, typeInfo, "Custom Source");
	}
	return source;
}
 
Example 25
Source Project: flink   Source File: ContinuousFileReaderOperator.java    License: Apache License 2.0 4 votes vote down vote up
public ContinuousFileReaderOperator(FileInputFormat<OUT> format) {
	this.format = checkNotNull(format);
}
 
Example 26
/** the Flink input format for this input */
public FileInputFormat<FData> inputFormat ( String path ) { return null; }
 
Example 27
/** the Flink input format for this input */
public FileInputFormat<FData> inputFormat ( String path  ) {
    FDataInputFormat sf = new FDataInputFormat();
    sf.setFilePath(path.toString());
    return sf;
}
 
Example 28
/** the Flink input format for this input */
public FileInputFormat<FData> inputFormat ( String path ) {
    return new ParsedInputFormat(path);
}
 
Example 29
/** the Flink input format for this input */
abstract public FileInputFormat<FData> inputFormat ( String path );
 
Example 30
/** the Flink input format for this input */
public FileInputFormat<FData> inputFormat ( String path  ) {
    FDataInputFormat sf = new FDataInputFormat();
    sf.setFilePath(path.toString());
    return sf;
}