Java Code Examples for org.apache.flink.core.fs.FileSystem#listStatus()

The following examples show how to use org.apache.flink.core.fs.FileSystem#listStatus() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 2
Source File: StanfordTweetsDataSetInputFormat.java    From flink-examples with MIT License 6 votes vote down vote up
@Override
public TweetFileInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    FileSystem fileSystem = getFileSystem();
    FileStatus[] statuses = fileSystem.listStatus(new Path(inputPath));
    logger.info("Found {} files", statuses.length);

    List<TweetFileInputSplit> splits = new ArrayList<>();
    for (int i = 0; i < statuses.length; i++) {
        FileStatus status = statuses[i];
        String fileName = status.getPath().getName();
        if (fileName.endsWith("edges")) {
            splits.add(new TweetFileInputSplit(i, status.getPath()));
        }
    }

    logger.info("Result number of splits: {}", splits.size());
    return splits.toArray(new TweetFileInputSplit[splits.size()]);
}
 
Example 3
Source File: RocksDBIncrementalRestoreOperation.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}
 
Example 4
Source File: FileUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException {
	String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', "");
	if (fs.getFileStatus(fileOrDirectory).isDir()) {
		out.putNextEntry(new ZipEntry(relativePath + '/'));
		for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) {
			addToZip(containedFile.getPath(), fs, rootDir, out);
		}
	} else {
		ZipEntry entry = new ZipEntry(relativePath);
		out.putNextEntry(entry);

		try (FSDataInputStream in = fs.open(fileOrDirectory)) {
			IOUtils.copyBytes(in, out, false);
		}
		out.closeEntry();
	}
}
 
Example 5
Source File: FileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example 6
Source File: FileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example 7
Source File: PartitionTempFileManager.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Returns checkpoints whose keys are less than or equal to {@code toCpId}
 * in temporary base path.
 */
public static long[] headCheckpoints(FileSystem fs, Path basePath, long toCpId) throws IOException {
	List<Long> cps = new ArrayList<>();

	for (FileStatus taskStatus : fs.listStatus(basePath)) {
		String name = taskStatus.getPath().getName();
		if (isCheckpointDir(name)) {
			long currentCp = getCheckpointId(name);
			// commit paths that less than current checkpoint id.
			if (currentCp <= toCpId) {
				cps.add(currentCp);
			}
		}
	}
	return cps.stream().mapToLong(v -> v).toArray();
}
 
Example 8
Source File: RocksDBIncrementalRestoreOperation.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}
 
Example 9
Source File: FileUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException {
	String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', "");
	if (fs.getFileStatus(fileOrDirectory).isDir()) {
		out.putNextEntry(new ZipEntry(relativePath + '/'));
		for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) {
			addToZip(containedFile.getPath(), fs, rootDir, out);
		}
	} else {
		ZipEntry entry = new ZipEntry(relativePath);
		out.putNextEntry(entry);

		try (FSDataInputStream in = fs.open(fileOrDirectory)) {
			IOUtils.copyBytes(in, out, false);
		}
		out.closeEntry();
	}
}
 
Example 10
Source File: PartitionTempFileManager.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns task temporary paths in this checkpoint.
 */
public static List<Path> listTaskTemporaryPaths(
		FileSystem fs, Path basePath, long checkpointId) throws Exception {
	List<Path> taskTmpPaths = new ArrayList<>();

	for (FileStatus taskStatus : fs.listStatus(new Path(basePath, checkpointName(checkpointId)))) {
		if (isTaskDir(taskStatus.getPath().getName())) {
			taskTmpPaths.add(taskStatus.getPath());
		}
	}
	return taskTmpPaths;
}
 
Example 11
Source File: ContinuousFileMonitoringFunction.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}
 
Example 12
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}
 
Example 13
Source File: PartitionPathUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * List file status without hidden files.
 */
public static FileStatus[] listStatusWithoutHidden(FileSystem fs, Path dir) throws IOException {
	FileStatus[] statuses = fs.listStatus(dir);
	if (statuses == null) {
		return null;
	}
	return Arrays.stream(statuses).filter(fileStatus -> !isHiddenFile(fileStatus)).toArray(FileStatus[]::new);
}
 
Example 14
Source File: FileUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException {
	tFS.mkdirs(targetPath);
	FileStatus[] contents = sFS.listStatus(sourcePath);
	for (FileStatus content : contents) {
		String distPath = content.getPath().toString();
		if (content.isDir()) {
			if (distPath.endsWith("/")) {
				distPath = distPath.substring(0, distPath.length() - 1);
			}
		}
		String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/"));
		copy(content.getPath(), new Path(localPath), executable);
	}
}
 
Example 15
Source File: FileUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException {
	tFS.mkdirs(targetPath);
	FileStatus[] contents = sFS.listStatus(sourcePath);
	for (FileStatus content : contents) {
		String distPath = content.getPath().toString();
		if (content.isDir()) {
			if (distPath.endsWith("/")) {
				distPath = distPath.substring(0, distPath.length() - 1);
			}
		}
		String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/"));
		copy(content.getPath(), new Path(localPath), executable);
	}
}
 
Example 16
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the paths of the files not yet processed.
 * @param fileSystem The filesystem where the monitored directory resides.
 */
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {

	final FileStatus[] statuses;
	try {
		statuses = fileSystem.listStatus(path);
	} catch (IOException e) {
		// we may run into an IOException if files are moved while listing their status
		// delay the check for eligible files in this case
		return Collections.emptyMap();
	}

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
		return Collections.emptyMap();
	} else {
		Map<Path, FileStatus> files = new HashMap<>();
		// handle the new files
		for (FileStatus status : statuses) {
			if (!status.isDir()) {
				Path filePath = status.getPath();
				long modificationTime = status.getModificationTime();
				if (!shouldIgnore(filePath, modificationTime)) {
					files.put(filePath, status);
				}
			} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
				files.putAll(listEligibleFiles(fileSystem, status.getPath()));
			}
		}
		return files;
	}
}
 
Example 17
Source File: HadoopSwiftFileSystemITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testDirectoryListing() throws Exception {
	final Configuration conf = createConfiguration();

	FileSystem.initialize(conf);

	final Path directory = new Path("swift://" + CONTAINER + '.' + SERVICENAME + '/' + TEST_DATA_DIR + "/testdir/");
	final FileSystem fs = directory.getFileSystem();

	// directory must not yet exist
	assertFalse(fs.exists(directory));

	try {
		// create directory
		assertTrue(fs.mkdirs(directory));

		// seems the file system does not assume existence of empty directories
		assertTrue(fs.exists(directory));

		// directory empty
		assertEquals(0, fs.listStatus(directory).length);

		// create some files
		final int numFiles = 3;
		for (int i = 0; i < numFiles; i++) {
			Path file = new Path(directory, "/file-" + i);
			try (FSDataOutputStream out = fs.create(file, FileSystem.WriteMode.NO_OVERWRITE);
				OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
				writer.write("hello-" + i + "\n");
			}
		}

		FileStatus[] files = fs.listStatus(directory);
		assertNotNull(files);
		assertEquals(3, files.length);

		for (FileStatus status : files) {
			assertFalse(status.isDir());
		}

		// now that there are files, the directory must exist
		assertTrue(fs.exists(directory));
	}
	finally {
		// clean up
		fs.delete(directory, true);
	}

	// now directory must be gone
	assertFalse(fs.exists(directory));
}
 
Example 18
Source File: StanfordTweetsDataSetInputFormat.java    From flink-examples with MIT License 4 votes vote down vote up
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStatistics) throws IOException {
    FileSystem fileSystem = getFileSystem();
    FileStatus[] statuses = fileSystem.listStatus(new Path(inputPath));
    return new GraphStatistics(statuses.length);
}
 
Example 19
Source File: AbstractFileCheckpointStorageTestBase.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Validates that multiple checkpoints from different jobs with the same checkpoint ID do not
 * interfere with each other.
 */
@Test
public void testPersistMultipleMetadataOnlyCheckpoints() throws Exception {
	final FileSystem fs = FileSystem.getLocalFileSystem();
	final Path checkpointDir = new Path(tmp.newFolder().toURI());

	final long checkpointId = 177;

	final CheckpointStorage storage1 = createCheckpointStorage(checkpointDir);
	storage1.initializeBaseLocations();
	final CheckpointStorage storage2 = createCheckpointStorage(checkpointDir);
	storage2.initializeBaseLocations();

	final CheckpointStorageLocation loc1 = storage1.initializeLocationForCheckpoint(checkpointId);
	final CheckpointStorageLocation loc2 = storage2.initializeLocationForCheckpoint(checkpointId);

	final byte[] data1 = {77, 66, 55, 99, 88};
	final byte[] data2 = {1, 3, 2, 5, 4};

	final CompletedCheckpointStorageLocation completedLocation1;
	try (CheckpointMetadataOutputStream out = loc1.createMetadataOutputStream()) {
		out.write(data1);
		completedLocation1 = out.closeAndFinalizeCheckpoint();
	}
	final String result1 = completedLocation1.getExternalPointer();

	final CompletedCheckpointStorageLocation completedLocation2;
	try (CheckpointMetadataOutputStream out = loc2.createMetadataOutputStream()) {
		out.write(data2);
		completedLocation2 = out.closeAndFinalizeCheckpoint();
	}
	final String result2 = completedLocation2.getExternalPointer();

	// check that this went to a file, but in a nested directory structure

	// one directory per storage
	FileStatus[] files = fs.listStatus(checkpointDir);
	assertEquals(2, files.length);

	// in each per-storage directory, one for the checkpoint
	FileStatus[] job1Files = fs.listStatus(files[0].getPath());
	FileStatus[] job2Files = fs.listStatus(files[1].getPath());
	assertTrue(job1Files.length >= 1);
	assertTrue(job2Files.length >= 1);

	assertTrue(fs.exists(new Path(result1, AbstractFsCheckpointStorage.METADATA_FILE_NAME)));
	assertTrue(fs.exists(new Path(result2, AbstractFsCheckpointStorage.METADATA_FILE_NAME)));

	// check that both storages can resolve each others contents
	validateContents(storage1.resolveCheckpoint(result1).getMetadataHandle(), data1);
	validateContents(storage1.resolveCheckpoint(result2).getMetadataHandle(), data2);
	validateContents(storage2.resolveCheckpoint(result1).getMetadataHandle(), data1);
	validateContents(storage2.resolveCheckpoint(result2).getMetadataHandle(), data2);
}
 
Example 20
Source File: BlobServerRecoveryTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Helper to test that the {@link BlobServer} recovery from its HA store works.
 *
 * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve
 * them via a shared HA store upon request of a {@link BlobCacheService}.
 *
 * @param config
 * 		blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH}
 * 		and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt>
 * @param blobStore
 * 		shared HA blob store to use
 *
 * @throws IOException
 * 		in case of failures
 */
public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException {
	final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
	String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId;
	Random rand = new Random();

	try (
		BlobServer server0 = new BlobServer(config, blobStore);
		BlobServer server1 = new BlobServer(config, blobStore);
		// use VoidBlobStore as the HA store to force download from server[1]'s HA store
		BlobCacheService cache1 = new BlobCacheService(
			config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort())
		)) {

		server0.start();
		server1.start();

		// Random data
		byte[] expected = new byte[1024];
		rand.nextBytes(expected);
		byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);

		BlobKey[] keys = new BlobKey[2];
		BlobKey nonHAKey;

		// Put job-related HA data
		JobID[] jobId = new JobID[] { new JobID(), new JobID() };
		keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1
		keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2

		// put non-HA data
		nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB);
		verifyKeyDifferentHashEquals(keys[1], nonHAKey);

		// check that the storage directory exists
		final Path blobServerPath = new Path(storagePath, "blob");
		FileSystem fs = blobServerPath.getFileSystem();
		assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath));

		// Verify HA requests from cache1 (connected to server1) with no immediate access to the file
		verifyContents(cache1, jobId[0], keys[0], expected);
		verifyContents(cache1, jobId[1], keys[1], expected2);

		// Verify non-HA file is not accessible from server1
		verifyDeleted(cache1, jobId[0], nonHAKey);

		// Remove again
		server1.cleanupJob(jobId[0], true);
		server1.cleanupJob(jobId[1], true);

		// Verify everything is clean
		assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath)));
		if (fs.exists(blobServerPath)) {
			final org.apache.flink.core.fs.FileStatus[] recoveryFiles =
				fs.listStatus(blobServerPath);
			ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length);
			for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) {
				filenames.add(file.toString());
			}
			fail("Unclean state backend: " + filenames);
		}
	}
}