Java Code Examples for org.apache.flink.core.fs.FileStatus#isDir()

The following examples show how to use org.apache.flink.core.fs.FileStatus#isDir() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryInputFormat.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 2
Source File: BinaryInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 3
Source File: BinaryInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 4
Source File: PartitionPathUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void listStatusRecursively(
		FileSystem fs,
		FileStatus fileStatus,
		int level,
		int expectLevel,
		List<FileStatus> results) throws IOException {
	if (expectLevel == level) {
		results.add(fileStatus);
		return;
	}

	if (fileStatus.isDir()) {
		for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
			listStatusRecursively(fs, stat, level + 1, expectLevel, results);
		}
	}
}
 
Example 5
Source File: FileUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException {
	tFS.mkdirs(targetPath);
	FileStatus[] contents = sFS.listStatus(sourcePath);
	for (FileStatus content : contents) {
		String distPath = content.getPath().toString();
		if (content.isDir()) {
			if (distPath.endsWith("/")) {
				distPath = distPath.substring(0, distPath.length() - 1);
			}
		}
		String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/"));
		copy(content.getPath(), new Path(localPath), executable);
	}
}
 
Example 6
Source File: DistCp.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void getCopyTasks(Path p, String rel, List<FileCopyTask> tasks) throws IOException {
	FileStatus[] res = p.getFileSystem().listStatus(p);
	if (res == null) {
		return;
	}
	for (FileStatus fs : res) {
		if (fs.isDir()) {
			getCopyTasks(fs.getPath(), rel + fs.getPath().getName() + "/", tasks);
		} else {
			Path cp = fs.getPath();
			tasks.add(new FileCopyTask(cp, rel + cp.getName()));
		}
	}
}
 
Example 7
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}
 
Example 8
Source File: FileUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException {
	tFS.mkdirs(targetPath);
	FileStatus[] contents = sFS.listStatus(sourcePath);
	for (FileStatus content : contents) {
		String distPath = content.getPath().toString();
		if (content.isDir()) {
			if (distPath.endsWith("/")) {
				distPath = distPath.substring(0, distPath.length() - 1);
			}
		}
		String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/"));
		copy(content.getPath(), new Path(localPath), executable);
	}
}
 
Example 9
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Enumerate all files in the directory and recursive if enumerateNestedFiles is true.
 * @return the total length of accepted files.
 */
private long addFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles)
		throws IOException {
	final FileSystem fs = path.getFileSystem();

	long length = 0;

	for(FileStatus dir: fs.listStatus(path)) {
		if (dir.isDir()) {
			if (acceptFile(dir) && enumerateNestedFiles) {
				length += addFilesInDir(dir.getPath(), files, logExcludedFiles);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
		else {
			if(acceptFile(dir)) {
				files.add(dir);
				length += dir.getLen();
				testForUnsplittable(dir);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
	}
	return length;
}
 
Example 10
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 11
Source File: DistCp.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void getCopyTasks(Path p, String rel, List<FileCopyTask> tasks) throws IOException {
	FileStatus[] res = p.getFileSystem().listStatus(p);
	if (res == null) {
		return;
	}
	for (FileStatus fs : res) {
		if (fs.isDir()) {
			getCopyTasks(fs.getPath(), rel + fs.getPath().getName() + "/", tasks);
		} else {
			Path cp = fs.getPath();
			tasks.add(new FileCopyTask(cp, rel + cp.getName()));
		}
	}
}
 
Example 12
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}
 
Example 13
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 14
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Enumerate all files in the directory and recursive if enumerateNestedFiles is true.
 * @return the total length of accepted files.
 */
private long addFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles)
		throws IOException {
	final FileSystem fs = path.getFileSystem();

	long length = 0;

	for(FileStatus dir: fs.listStatus(path)) {
		if (dir.isDir()) {
			if (acceptFile(dir) && enumerateNestedFiles) {
				length += addFilesInDir(dir.getPath(), files, logExcludedFiles);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
		else {
			if(acceptFile(dir)) {
				files.add(dir);
				length += dir.getLen();
				testForUnsplittable(dir);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
	}
	return length;
}
 
Example 15
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 16
Source File: DistCp.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static void getCopyTasks(Path p, String rel, List<FileCopyTask> tasks) throws IOException {
	FileStatus[] res = p.getFileSystem().listStatus(p);
	if (res == null) {
		return;
	}
	for (FileStatus fs : res) {
		if (fs.isDir()) {
			getCopyTasks(fs.getPath(), rel + fs.getPath().getName() + "/", tasks);
		} else {
			Path cp = fs.getPath();
			tasks.add(new FileCopyTask(cp, rel + cp.getName()));
		}
	}
}
 
Example 17
Source File: ContinuousFileMonitoringFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}
 
Example 18
Source File: FileUtils.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException {
	tFS.mkdirs(targetPath);
	FileStatus[] contents = sFS.listStatus(sourcePath);
	for (FileStatus content : contents) {
		String distPath = content.getPath().toString();
		if (content.isDir()) {
			if (distPath.endsWith("/")) {
				distPath = distPath.substring(0, distPath.length() - 1);
			}
		}
		String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/"));
		copy(content.getPath(), new Path(localPath), executable);
	}
}
 
Example 19
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Enumerate all files in the directory and recursive if enumerateNestedFiles is true.
 * @return the total length of accepted files.
 */
private long addFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles)
		throws IOException {
	final FileSystem fs = path.getFileSystem();

	long length = 0;

	for(FileStatus dir: fs.listStatus(path)) {
		if (dir.isDir()) {
			if (acceptFile(dir) && enumerateNestedFiles) {
				length += addFilesInDir(dir.getPath(), files, logExcludedFiles);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
		else {
			if(acceptFile(dir)) {
				files.add(dir);
				length += dir.getLen();
				testForUnsplittable(dir);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
	}
	return length;
}