Java Code Examples for org.apache.flink.core.fs.Path#getFileSystem()

The following examples show how to use org.apache.flink.core.fs.Path#getFileSystem() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 2
Source File: YarnFileStageTestS3ITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Verifies that nested directories are properly copied with to the given S3 path (using the
 * appropriate file system) during resource uploads for YARN.
 *
 * @param scheme
 * 		file system scheme
 * @param pathSuffix
 * 		test path suffix which will be the test's target path
 */
private void testRecursiveUploadForYarn(String scheme, String pathSuffix) throws Exception {
	++numRecursiveUploadTests;

	final Path basePath = new Path(S3TestCredentials.getTestBucketUriWithScheme(scheme) + TEST_DATA_DIR);
	final HadoopFileSystem fs = (HadoopFileSystem) basePath.getFileSystem();

	assumeFalse(fs.exists(basePath));

	try {
		final Path directory = new Path(basePath, pathSuffix);

		YarnFileStageTest.testCopyFromLocalRecursive(fs.getHadoopFileSystem(),
			new org.apache.hadoop.fs.Path(directory.toUri()), tempFolder, true);
	} finally {
		// clean up
		fs.delete(basePath, true);
	}
}
 
Example 3
Source File: RocksDBIncrementalRestoreOperation.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}
 
Example 4
Source File: MemoryBackendCheckpointStorage.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new MemoryBackendCheckpointStorage.
 *
 * @param jobId The ID of the job writing the checkpoints.
 * @param checkpointsBaseDirectory The directory to write checkpoints to. May be null,
 *                                 in which case this storage does not support durable persistence.
 * @param defaultSavepointLocation The default savepoint directory, or null, if none is set.
 * @param maxStateSize The maximum size of each individual piece of state.
 *
 * @throws IOException Thrown if a checkpoint base directory is given configured and the
 *                     checkpoint directory cannot be created within that directory.
 */
public MemoryBackendCheckpointStorage(
		JobID jobId,
		@Nullable Path checkpointsBaseDirectory,
		@Nullable Path defaultSavepointLocation,
		int maxStateSize) throws IOException {

	super(jobId, defaultSavepointLocation);

	checkArgument(maxStateSize > 0);
	this.maxStateSize = maxStateSize;

	if (checkpointsBaseDirectory == null) {
		checkpointsDirectory = null;
		fileSystem = null;
	}
	else {
		this.fileSystem = checkpointsBaseDirectory.getFileSystem();
		this.checkpointsDirectory = getCheckpointDirectoryForJob(checkpointsBaseDirectory, jobId);

		fileSystem.mkdirs(checkpointsDirectory);
	}
}
 
Example 5
Source File: CheckpointStreamWithResultProvider.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nonnull
static CheckpointStreamWithResultProvider createDuplicatingStream(
	@Nonnegative long checkpointId,
	@Nonnull CheckpointedStateScope checkpointedStateScope,
	@Nonnull CheckpointStreamFactory primaryStreamFactory,
	@Nonnull LocalRecoveryDirectoryProvider secondaryStreamDirProvider) throws IOException {

	CheckpointStreamFactory.CheckpointStateOutputStream primaryOut =
		primaryStreamFactory.createCheckpointStateOutputStream(checkpointedStateScope);

	try {
		File outFile = new File(
			secondaryStreamDirProvider.subtaskSpecificCheckpointDirectory(checkpointId),
			String.valueOf(UUID.randomUUID()));
		Path outPath = new Path(outFile.toURI());

		CheckpointStreamFactory.CheckpointStateOutputStream secondaryOut =
			new FileBasedStateOutputStream(outPath.getFileSystem(), outPath);

		return new CheckpointStreamWithResultProvider.PrimaryAndSecondaryStream(primaryOut, secondaryOut);
	} catch (IOException secondaryEx) {
		LOG.warn("Exception when opening secondary/local checkpoint output stream. " +
			"Continue only with the primary stream.", secondaryEx);
	}

	return new CheckpointStreamWithResultProvider.PrimaryStreamOnly(primaryOut);
}
 
Example 6
Source File: RocksDBIncrementalRestoreOperation.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void cleanUpPathQuietly(@Nonnull Path path) {
	try {
		FileSystem fileSystem = path.getFileSystem();
		if (fileSystem.exists(path)) {
			fileSystem.delete(path, true);
		}
	} catch (IOException ex) {
		LOG.warn("Failed to clean up path " + path, ex);
	}
}
 
Example 7
Source File: FileUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
public static Path compressDirectory(Path directory, Path target) throws IOException {
	FileSystem sourceFs = directory.getFileSystem();
	FileSystem targetFs = target.getFileSystem();

	try (ZipOutputStream out = new ZipOutputStream(targetFs.create(target, FileSystem.WriteMode.NO_OVERWRITE))) {
		addToZip(directory, sourceFs, directory.getParent(), out);
	}
	return target;
}
 
Example 8
Source File: RocksDBCheckpointIterator.java    From bravo with Apache License 2.0 5 votes vote down vote up
private void copyStateDataHandleData(
		Path restoreFilePath,
		StreamStateHandle remoteFileHandle) throws IOException {

	FileSystem restoreFileSystem = restoreFilePath.getFileSystem();

	FSDataInputStream inputStream = null;
	FSDataOutputStream outputStream = null;

	try {
		inputStream = remoteFileHandle.openInputStream();
		cancelStreamRegistry.registerCloseable(inputStream);

		outputStream = restoreFileSystem.create(restoreFilePath, FileSystem.WriteMode.OVERWRITE);
		cancelStreamRegistry.registerCloseable(outputStream);

		byte[] buffer = new byte[8 * 1024];
		while (true) {
			int numBytes = inputStream.read(buffer);
			if (numBytes == -1) {
				break;
			}

			outputStream.write(buffer, 0, numBytes);
		}
	} finally {
		if (cancelStreamRegistry.unregisterCloseable(inputStream)) {
			inputStream.close();
		}

		if (cancelStreamRegistry.unregisterCloseable(outputStream)) {
			outputStream.close();
		}
	}
}
 
Example 9
Source File: PythonPlanBinder.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static void unzipPythonLibrary(Path targetDir) throws IOException {
	FileSystem targetFs = targetDir.getFileSystem();
	ClassLoader classLoader = PythonPlanBinder.class.getClassLoader();
	try (ZipInputStream zis = new ZipInputStream(classLoader.getResourceAsStream("python-source.zip"))) {
		ZipEntry entry = zis.getNextEntry();
		while (entry != null) {
			String fileName = entry.getName();
			Path newFile = new Path(targetDir, fileName);
			if (entry.isDirectory()) {
				targetFs.mkdirs(newFile);
			} else {
				try {
					LOG.debug("Unzipping to {}.", newFile);
					FSDataOutputStream fsDataOutputStream = targetFs.create(newFile, FileSystem.WriteMode.NO_OVERWRITE);
					IOUtils.copyBytes(zis, fsDataOutputStream, false);
				} catch (Exception e) {
					zis.closeEntry();
					throw new IOException("Failed to unzip flink python library.", e);
				}
			}

			zis.closeEntry();
			entry = zis.getNextEntry();
		}
		zis.closeEntry();
	}
}
 
Example 10
Source File: HDFSTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Test that {@link FileUtils#deletePathIfEmpty(FileSystem, Path)} deletes the path if it is
 * empty. A path can only be empty if it is a directory which does not contain any
 * files/directories.
 */
@Test
public void testDeletePathIfEmpty() throws IOException {
	final Path basePath = new Path(hdfsURI);
	final Path directory = new Path(basePath, UUID.randomUUID().toString());
	final Path directoryFile = new Path(directory, UUID.randomUUID().toString());
	final Path singleFile = new Path(basePath, UUID.randomUUID().toString());

	FileSystem fs = basePath.getFileSystem();

	fs.mkdirs(directory);

	byte[] data = "HDFSTest#testDeletePathIfEmpty".getBytes(ConfigConstants.DEFAULT_CHARSET);

	for (Path file: Arrays.asList(singleFile, directoryFile)) {
		org.apache.flink.core.fs.FSDataOutputStream outputStream = fs.create(file, FileSystem.WriteMode.OVERWRITE);
		outputStream.write(data);
		outputStream.close();
	}

	// verify that the files have been created
	assertTrue(fs.exists(singleFile));
	assertTrue(fs.exists(directoryFile));

	// delete the single file
	assertFalse(FileUtils.deletePathIfEmpty(fs, singleFile));
	assertTrue(fs.exists(singleFile));

	// try to delete the non-empty directory
	assertFalse(FileUtils.deletePathIfEmpty(fs, directory));
	assertTrue(fs.exists(directory));

	// delete the file contained in the directory
	assertTrue(fs.delete(directoryFile, false));

	// now the deletion should work
	assertTrue(FileUtils.deletePathIfEmpty(fs, directory));
	assertFalse(fs.exists(directory));
}
 
Example 11
Source File: HadoopSwiftFileSystemITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleFileWriteAndRead() throws Exception {
	final Configuration conf = createConfiguration();

	final String testLine = "Hello Upload!";

	FileSystem.initialize(conf);

	final Path path = new Path("swift://" + CONTAINER + '.' + SERVICENAME + '/' + TEST_DATA_DIR + "/test.txt");
	final FileSystem fs = path.getFileSystem();

	try {
		try (FSDataOutputStream out = fs.create(path, WriteMode.OVERWRITE);
			OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
			writer.write(testLine);
		}

		try (FSDataInputStream in = fs.open(path);
			InputStreamReader ir = new InputStreamReader(in, StandardCharsets.UTF_8);
			BufferedReader reader = new BufferedReader(ir)) {
			String line = reader.readLine();
			assertEquals(testLine, line);
		}
	}
	finally {
		fs.delete(path, false);
	}
}
 
Example 12
Source File: CheckpointStreamWithResultProvider.java    From flink with Apache License 2.0 5 votes vote down vote up
@Nonnull
static CheckpointStreamWithResultProvider createDuplicatingStream(
	@Nonnegative long checkpointId,
	@Nonnull CheckpointedStateScope checkpointedStateScope,
	@Nonnull CheckpointStreamFactory primaryStreamFactory,
	@Nonnull LocalRecoveryDirectoryProvider secondaryStreamDirProvider) throws IOException {

	CheckpointStreamFactory.CheckpointStateOutputStream primaryOut =
		primaryStreamFactory.createCheckpointStateOutputStream(checkpointedStateScope);

	try {
		File outFile = new File(
			secondaryStreamDirProvider.subtaskSpecificCheckpointDirectory(checkpointId),
			String.valueOf(UUID.randomUUID()));
		Path outPath = new Path(outFile.toURI());

		CheckpointStreamFactory.CheckpointStateOutputStream secondaryOut =
			new FileBasedStateOutputStream(outPath.getFileSystem(), outPath);

		return new CheckpointStreamWithResultProvider.PrimaryAndSecondaryStream(primaryOut, secondaryOut);
	} catch (IOException secondaryEx) {
		LOG.warn("Exception when opening secondary/local checkpoint output stream. " +
			"Continue only with the primary stream.", secondaryEx);
	}

	return new CheckpointStreamWithResultProvider.PrimaryStreamOnly(primaryOut);
}
 
Example 13
Source File: HadoopOSSFileSystemITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setup() throws IOException {
	OSSTestCredentials.assumeCredentialsAvailable();

	final Configuration conf = new Configuration();
	conf.setString("fs.oss.endpoint", OSSTestCredentials.getOSSEndpoint());
	conf.setString("fs.oss.accessKeyId", OSSTestCredentials.getOSSAccessKey());
	conf.setString("fs.oss.accessKeySecret", OSSTestCredentials.getOSSSecretKey());
	FileSystem.initialize(conf);
	basePath = new Path(OSSTestCredentials.getTestBucketUri() + TEST_DATA_DIR);
	fs = basePath.getFileSystem();
	deadline = 0;
}
 
Example 14
Source File: FileCacheDirectoriesTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testDirectoryCleanUp() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
	ExecutionAttemptID attemptID2 = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
	fileCache.createTmpFile(fileName, entry, jobID, attemptID2);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID1);
	// still should be available
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID2);
	// still should be available, file will be deleted after cleanupInterval
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	// after a while, the file should disappear
	assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
	executorService.lastDeleteProcess.run();

	assertFalse(fs.exists(dstPath));
	assertFalse(fs.exists(cacheFile));
}
 
Example 15
Source File: MapRFsFactoryTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapRFsKind() throws Exception {
	final Path path = new Path("maprfs:///my/path");

	final FileSystem fs = path.getFileSystem();

	assertEquals(FileSystemKind.FILE_SYSTEM, fs.getKind());
}
 
Example 16
Source File: PythonPlanBinder.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private static void deleteIfExists(Path path) throws IOException {
	FileSystem fs = path.getFileSystem();
	if (fs.exists(path)) {
		fs.delete(path, true);
	}
}
 
Example 17
Source File: FileOutputFormat.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	if (taskNumber < 0 || numTasks < 1) {
		throw new IllegalArgumentException("TaskNumber: " + taskNumber + ", numTasks: " + numTasks);
	}
	
	if (LOG.isDebugEnabled()) {
		LOG.debug("Opening stream for output (" + (taskNumber+1) + "/" + numTasks + "). WriteMode=" + writeMode +
				", OutputDirectoryMode=" + outputDirectoryMode);
	}
	
	Path p = this.outputFilePath;
	if (p == null) {
		throw new IOException("The file path is null.");
	}
	
	final FileSystem fs = p.getFileSystem();

	// if this is a local file system, we need to initialize the local output directory here
	if (!fs.isDistributedFS()) {
		
		if (numTasks == 1 && outputDirectoryMode == OutputDirectoryMode.PARONLY) {
			// output should go to a single file
			
			// prepare local output path. checks for write mode and removes existing files in case of OVERWRITE mode
			if(!fs.initOutPathLocalFS(p, writeMode, false)) {
				// output preparation failed! Cancel task.
				throw new IOException("Output path '" + p.toString() + "' could not be initialized. Canceling task...");
			}
		}
		else {
			// numTasks > 1 || outDirMode == OutputDirectoryMode.ALWAYS
			
			if(!fs.initOutPathLocalFS(p, writeMode, true)) {
				// output preparation failed! Cancel task.
				throw new IOException("Output directory '" + p.toString() + "' could not be created. Canceling task...");
			}
		}
	}



	// Suffix the path with the parallel instance index, if needed
	this.actualFilePath = (numTasks > 1 || outputDirectoryMode == OutputDirectoryMode.ALWAYS) ? p.suffix("/" + getDirectoryFileName(taskNumber)) : p;

	// create output file
	this.stream = fs.create(this.actualFilePath, writeMode);
	
	// at this point, the file creation must have succeeded, or an exception has been thrown
	this.fileCreated = true;
}
 
Example 18
Source File: SnapshotDirectory.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private SnapshotDirectory(@Nonnull Path directory) throws IOException {
	this(directory, directory.getFileSystem());
}
 
Example 19
Source File: RocksDBStateUploader.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
private StreamStateHandle uploadLocalFileToCheckpointFs(
	Path filePath,
	CheckpointStreamFactory checkpointStreamFactory,
	CloseableRegistry closeableRegistry) throws IOException {
	FSDataInputStream inputStream = null;
	CheckpointStreamFactory.CheckpointStateOutputStream outputStream = null;

	try {
		final byte[] buffer = new byte[READ_BUFFER_SIZE];

		FileSystem backupFileSystem = filePath.getFileSystem();
		inputStream = backupFileSystem.open(filePath);
		closeableRegistry.registerCloseable(inputStream);

		outputStream = checkpointStreamFactory
			.createCheckpointStateOutputStream(CheckpointedStateScope.SHARED);
		closeableRegistry.registerCloseable(outputStream);

		while (true) {
			int numBytes = inputStream.read(buffer);

			if (numBytes == -1) {
				break;
			}

			outputStream.write(buffer, 0, numBytes);
		}

		StreamStateHandle result = null;
		if (closeableRegistry.unregisterCloseable(outputStream)) {
			result = outputStream.closeAndGetHandle();
			outputStream = null;
		}
		return result;

	} finally {

		if (closeableRegistry.unregisterCloseable(inputStream)) {
			IOUtils.closeQuietly(inputStream);
		}

		if (closeableRegistry.unregisterCloseable(outputStream)) {
			IOUtils.closeQuietly(outputStream);
		}
	}
}
 
Example 20
Source File: BlobServerRecoveryTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Helper to test that the {@link BlobServer} recovery from its HA store works.
 *
 * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve
 * them via a shared HA store upon request of a {@link BlobCacheService}.
 *
 * @param config
 * 		blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH}
 * 		and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt>
 * @param blobStore
 * 		shared HA blob store to use
 *
 * @throws IOException
 * 		in case of failures
 */
public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException {
	final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
	String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId;
	Random rand = new Random();

	try (
		BlobServer server0 = new BlobServer(config, blobStore);
		BlobServer server1 = new BlobServer(config, blobStore);
		// use VoidBlobStore as the HA store to force download from server[1]'s HA store
		BlobCacheService cache1 = new BlobCacheService(
			config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort())
		)) {

		server0.start();
		server1.start();

		// Random data
		byte[] expected = new byte[1024];
		rand.nextBytes(expected);
		byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);

		BlobKey[] keys = new BlobKey[2];
		BlobKey nonHAKey;

		// Put job-related HA data
		JobID[] jobId = new JobID[] { new JobID(), new JobID() };
		keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1
		keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2

		// put non-HA data
		nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB);
		verifyKeyDifferentHashEquals(keys[1], nonHAKey);

		// check that the storage directory exists
		final Path blobServerPath = new Path(storagePath, "blob");
		FileSystem fs = blobServerPath.getFileSystem();
		assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath));

		// Verify HA requests from cache1 (connected to server1) with no immediate access to the file
		verifyContents(cache1, jobId[0], keys[0], expected);
		verifyContents(cache1, jobId[1], keys[1], expected2);

		// Verify non-HA file is not accessible from server1
		verifyDeleted(cache1, jobId[0], nonHAKey);

		// Remove again
		server1.cleanupJob(jobId[0], true);
		server1.cleanupJob(jobId[1], true);

		// Verify everything is clean
		assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath)));
		if (fs.exists(blobServerPath)) {
			final org.apache.flink.core.fs.FileStatus[] recoveryFiles =
				fs.listStatus(blobServerPath);
			ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length);
			for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) {
				filenames.add(file.toString());
			}
			fail("Unclean state backend: " + filenames);
		}
	}
}