Java Code Examples for org.apache.flink.core.fs.FileStatus#getModificationTime()

The following examples show how to use org.apache.flink.core.fs.FileStatus#getModificationTime() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ContinuousFileMonitoringFunction.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates the input splits to be forwarded to the downstream tasks of the
 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
 * list will be processed.
 * @param eligibleFiles The files to process.
 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
			Map<Path, FileStatus> eligibleFiles) throws IOException {

	Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
	if (eligibleFiles.isEmpty()) {
		return splitsByModTime;
	}

	for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
		FileStatus fileStatus = eligibleFiles.get(split.getPath());
		if (fileStatus != null) {
			Long modTime = fileStatus.getModificationTime();
			List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
			if (splitsToForward == null) {
				splitsToForward = new ArrayList<>();
				splitsByModTime.put(modTime, splitsToForward);
			}
			splitsToForward.add(new TimestampedFileInputSplit(
				modTime, split.getSplitNumber(), split.getPath(),
				split.getStart(), split.getLength(), split.getHostnames()));
		}
	}
	return splitsByModTime;
}
 
Example 2
Source File: FileMonitoringFunction.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example 3
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates the input splits to be forwarded to the downstream tasks of the
 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
 * list will be processed.
 * @param eligibleFiles The files to process.
 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
			Map<Path, FileStatus> eligibleFiles) throws IOException {

	Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
	if (eligibleFiles.isEmpty()) {
		return splitsByModTime;
	}

	for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
		FileStatus fileStatus = eligibleFiles.get(split.getPath());
		if (fileStatus != null) {
			Long modTime = fileStatus.getModificationTime();
			List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
			if (splitsToForward == null) {
				splitsToForward = new ArrayList<>();
				splitsByModTime.put(modTime, splitsToForward);
			}
			splitsToForward.add(new TimestampedFileInputSplit(
				modTime, split.getSplitNumber(), split.getPath(),
				split.getStart(), split.getLength(), split.getHostnames()));
		}
	}
	return splitsByModTime;
}
 
Example 4
Source File: FileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example 5
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates the input splits to be forwarded to the downstream tasks of the
 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
 * list will be processed.
 * @param eligibleFiles The files to process.
 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
			Map<Path, FileStatus> eligibleFiles) throws IOException {

	Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
	if (eligibleFiles.isEmpty()) {
		return splitsByModTime;
	}

	for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
		FileStatus fileStatus = eligibleFiles.get(split.getPath());
		if (fileStatus != null) {
			Long modTime = fileStatus.getModificationTime();
			List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
			if (splitsToForward == null) {
				splitsToForward = new ArrayList<>();
				splitsByModTime.put(modTime, splitsToForward);
			}
			splitsToForward.add(new TimestampedFileInputSplit(
				modTime, split.getSplitNumber(), split.getPath(),
				split.getStart(), split.getLength(), split.getHostnames()));
		}
	}
	return splitsByModTime;
}
 
Example 6
Source File: FileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example 7
Source File: ContinuousFileMonitoringFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}
 
Example 8
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}
 
Example 9
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}