Java Code Examples for org.apache.flink.core.fs.FileSystem#getFileStatus()

The following examples show how to use org.apache.flink.core.fs.FileSystem#getFileStatus() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryInputFormat.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 2
Source File: FileCacheDirectoriesTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testDirectoryDownloadedFromBlob() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example 3
Source File: BinaryInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 4
Source File: FileCacheDirectoriesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testDirectoryDownloadedFromBlob() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example 5
Source File: BinaryInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 6
Source File: FileCacheDirectoriesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void testDirectoryDownloaded(DistributedCache.DistributedCacheEntry entry) throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	// copy / create the file
	final String fileName = "test_file";
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example 7
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 8
Source File: FileCacheDirectoriesTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testDirectoryCleanUp() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
	ExecutionAttemptID attemptID2 = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
	fileCache.createTmpFile(fileName, entry, jobID, attemptID2);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID1);
	// still should be available
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID2);
	// still should be available, file will be deleted after cleanupInterval
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	// after a while, the file should disappear
	assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
	executorService.lastDeleteProcess.run();

	assertFalse(fs.exists(dstPath));
	assertFalse(fs.exists(cacheFile));
}
 
Example 9
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 10
Source File: FileCacheDirectoriesTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDirectoryCleanUp() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
	ExecutionAttemptID attemptID2 = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
	fileCache.createTmpFile(fileName, entry, jobID, attemptID2);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID1);
	// still should be available
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID2);
	// still should be available, file will be deleted after cleanupInterval
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	// after a while, the file should disappear
	assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
	executorService.lastDeleteProcess.run();

	assertFalse(fs.exists(dstPath));
	assertFalse(fs.exists(cacheFile));
}
 
Example 11
Source File: PartitionPathUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
private static FileStatus[] getFileStatusRecurse(Path path, int expectLevel, FileSystem fs) {
	ArrayList<FileStatus> result = new ArrayList<>();

	try {
		FileStatus fileStatus = fs.getFileStatus(path);
		listStatusRecursively(fs, fileStatus, 0, expectLevel, result);
	} catch (IOException ignore) {
		return new FileStatus[0];
	}

	return result.toArray(new FileStatus[0]);
}
 
Example 12
Source File: FileInputFormat.java    From flink with Apache License 2.0 5 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException {

		// get the file info and check whether the cached statistics are still valid.
		final FileStatus file = fs.getFileStatus(filePath);
		long totalLength = 0;

		// enumerate all files
		if (file.isDir()) {
			totalLength += addFilesInDir(file.getPath(), files, false);
		} else {
			files.add(file);
			testForUnsplittable(file);
			totalLength += file.getLen();
		}

		// check the modification time stamp
		long latestModTime = 0;
		for (FileStatus f : files) {
			latestModTime = Math.max(f.getModificationTime(), latestModTime);
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		// sanity check
		if (totalLength <= 0) {
			totalLength = BaseStatistics.SIZE_UNKNOWN;
		}
		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 13
Source File: FileCacheDirectoriesTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testDirectoryCleanUp() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
	ExecutionAttemptID attemptID2 = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
	fileCache.createTmpFile(fileName, entry, jobID, attemptID2);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID1);
	// still should be available
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID2);
	// still should be available, file will be deleted after cleanupInterval
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	// after a while, the file should disappear
	assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
	executorService.lastDeleteProcess.run();

	assertFalse(fs.exists(dstPath));
	assertFalse(fs.exists(cacheFile));
}