Java Code Examples for org.apache.flink.core.fs.FileStatus#getLen()

The following examples show how to use org.apache.flink.core.fs.FileStatus#getLen() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 2
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Enumerate all files in the directory and recursive if enumerateNestedFiles is true.
 * @return the total length of accepted files.
 */
private long addFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles)
		throws IOException {
	final FileSystem fs = path.getFileSystem();

	long length = 0;

	for(FileStatus dir: fs.listStatus(path)) {
		if (dir.isDir()) {
			if (acceptFile(dir) && enumerateNestedFiles) {
				length += addFilesInDir(dir.getPath(), files, logExcludedFiles);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
		else {
			if(acceptFile(dir)) {
				files.add(dir);
				length += dir.getLen();
				testForUnsplittable(dir);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
	}
	return length;
}
 
Example 3
Source File: BinaryInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Fill in the statistics. The last modification time and the total input size are prefilled.
 *
 * @param files
 *        The files that are associated with this block input format.
 * @param stats
 *        The pre-filled statistics.
 */
protected SequentialStatistics createStatistics(List<FileStatus> files, FileBaseStatistics stats)
		throws IOException {
	if (files.isEmpty()) {
		return null;
	}

	BlockInfo blockInfo = new BlockInfo();
	long totalCount = 0;
	for (FileStatus file : files) {
		// invalid file
		if (file.getLen() < blockInfo.getInfoSize()) {
			continue;
		}

		FileSystem fs = file.getPath().getFileSystem();
		try (FSDataInputStream fdis = fs.open(file.getPath(), blockInfo.getInfoSize())) {
			fdis.seek(file.getLen() - blockInfo.getInfoSize());

			blockInfo.read(new DataInputViewStreamWrapper(fdis));
			totalCount += blockInfo.getAccumulatedRecordCount();
		}
	}

	final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount);
	return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth,
		totalCount);
}
 
Example 4
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 5
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Enumerate all files in the directory and recursive if enumerateNestedFiles is true.
 * @return the total length of accepted files.
 */
private long addFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles)
		throws IOException {
	final FileSystem fs = path.getFileSystem();

	long length = 0;

	for(FileStatus dir: fs.listStatus(path)) {
		if (dir.isDir()) {
			if (acceptFile(dir) && enumerateNestedFiles) {
				length += addFilesInDir(dir.getPath(), files, logExcludedFiles);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
		else {
			if(acceptFile(dir)) {
				files.add(dir);
				length += dir.getLen();
				testForUnsplittable(dir);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
	}
	return length;
}
 
Example 6
Source File: BinaryInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Fill in the statistics. The last modification time and the total input size are prefilled.
 *
 * @param files
 *        The files that are associated with this block input format.
 * @param stats
 *        The pre-filled statistics.
 */
protected SequentialStatistics createStatistics(List<FileStatus> files, FileBaseStatistics stats)
		throws IOException {
	if (files.isEmpty()) {
		return null;
	}

	BlockInfo blockInfo = new BlockInfo();
	long totalCount = 0;
	for (FileStatus file : files) {
		// invalid file
		if (file.getLen() < blockInfo.getInfoSize()) {
			continue;
		}

		FileSystem fs = file.getPath().getFileSystem();
		try (FSDataInputStream fdis = fs.open(file.getPath(), blockInfo.getInfoSize())) {
			fdis.seek(file.getLen() - blockInfo.getInfoSize());

			blockInfo.read(new DataInputViewStreamWrapper(fdis));
			totalCount += blockInfo.getAccumulatedRecordCount();
		}
	}

	final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount);
	return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth,
		totalCount);
}
 
Example 7
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 8
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Enumerate all files in the directory and recursive if enumerateNestedFiles is true.
 * @return the total length of accepted files.
 */
private long addFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles)
		throws IOException {
	final FileSystem fs = path.getFileSystem();

	long length = 0;

	for(FileStatus dir: fs.listStatus(path)) {
		if (dir.isDir()) {
			if (acceptFile(dir) && enumerateNestedFiles) {
				length += addFilesInDir(dir.getPath(), files, logExcludedFiles);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
		else {
			if(acceptFile(dir)) {
				files.add(dir);
				length += dir.getLen();
				testForUnsplittable(dir);
			} else {
				if (logExcludedFiles && LOG.isDebugEnabled()) {
					LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
				}
			}
		}
	}
	return length;
}
 
Example 9
Source File: BinaryInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Fill in the statistics. The last modification time and the total input size are prefilled.
 *
 * @param files
 *        The files that are associated with this block input format.
 * @param stats
 *        The pre-filled statistics.
 */
protected SequentialStatistics createStatistics(List<FileStatus> files, FileBaseStatistics stats)
		throws IOException {
	if (files.isEmpty()) {
		return null;
	}

	BlockInfo blockInfo = new BlockInfo();
	long totalCount = 0;
	for (FileStatus file : files) {
		// invalid file
		if (file.getLen() < blockInfo.getInfoSize()) {
			continue;
		}

		FileSystem fs = file.getPath().getFileSystem();
		try (FSDataInputStream fdis = fs.open(file.getPath(), blockInfo.getInfoSize())) {
			fdis.seek(file.getLen() - blockInfo.getInfoSize());

			blockInfo.read(new DataInputViewStreamWrapper(fdis));
			totalCount += blockInfo.getAccumulatedRecordCount();
		}
	}

	final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount);
	return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth,
		totalCount);
}
 
Example 10
Source File: LocalFileSystem.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
	return new BlockLocation[] {
			new LocalBlockLocation(hostName, file.getLen())
	};
}
 
Example 11
Source File: LocalFileSystem.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
	return new BlockLocation[] {
			new LocalBlockLocation(hostName, file.getLen())
	};
}
 
Example 12
Source File: DummyFSFileSystem.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
	return new BlockLocation[] {
		new LocalBlockLocation(HOSTNAME, file.getLen())
	};
}
 
Example 13
Source File: LocalFileSystem.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
	return new BlockLocation[] {
			new LocalBlockLocation(hostName, file.getLen())
	};
}
 
Example 14
Source File: DummyFSFileSystem.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
	return new BlockLocation[] {
		new LocalBlockLocation(HOSTNAME, file.getLen())
	};
}
 
Example 15
Source File: AnotherDummyFSFileSystem.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
	return new BlockLocation[] {
		new LocalBlockLocation(HOSTNAME, file.getLen())
	};
}