org.apache.flink.core.fs.FileStatus Java Examples

The following examples show how to use org.apache.flink.core.fs.FileStatus. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DistributedCacheDfsTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public String map(Integer value) throws Exception {
	final Path actualFile = new Path(getRuntimeContext().getDistributedCache().getFile("test_data").toURI());

	Path path = new Path(actualFile.toUri());
	assertFalse(path.getFileSystem().isDistributedFS());

	DataInputStream in = new DataInputStream(actualFile.getFileSystem().open(actualFile));
	String contents = in.readUTF();

	assertEquals(testFileContent, contents);

	final Path actualDir = new Path(getRuntimeContext().getDistributedCache().getFile("test_dir").toURI());
	FileStatus fileStatus = actualDir.getFileSystem().getFileStatus(actualDir);
	assertTrue(fileStatus.isDir());
	FileStatus[] fileStatuses = actualDir.getFileSystem().listStatus(actualDir);
	assertEquals(2, fileStatuses.length);

	return contents;
}
 
Example #2
Source File: FileCacheDirectoriesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testDirectoryDownloadedFromBlob() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example #3
Source File: FileInputFormat.java    From flink with Apache License 2.0 6 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException {

		long totalLength = 0;
		long latestModTime = 0;

		for (Path path : filePaths) {
			final FileSystem fs = FileSystem.get(path.toUri());
			final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files);

			if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) {
				totalLength = BaseStatistics.SIZE_UNKNOWN;
			} else if (totalLength != BaseStatistics.SIZE_UNKNOWN) {
				totalLength += stats.getTotalInputSize();
			}
			latestModTime = Math.max(latestModTime, stats.getLastModificationTime());
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example #4
Source File: LocalFileSystem.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
public FileStatus[] listStatus(final Path f) throws IOException {

	final File localf = pathToFile(f);
	FileStatus[] results;

	if (!localf.exists()) {
		return null;
	}
	if (localf.isFile()) {
		return new FileStatus[] { new LocalFileStatus(localf, this) };
	}

	final String[] names = localf.list();
	if (names == null) {
		return null;
	}
	results = new FileStatus[names.length];
	for (int i = 0; i < names.length; i++) {
		results[i] = getFileStatus(new Path(f, names[i]));
	}

	return results;
}
 
Example #5
Source File: PartitionTempFileManager.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Returns checkpoints whose keys are less than or equal to {@code toCpId}
 * in temporary base path.
 */
public static long[] headCheckpoints(FileSystem fs, Path basePath, long toCpId) throws IOException {
	List<Long> cps = new ArrayList<>();

	for (FileStatus taskStatus : fs.listStatus(basePath)) {
		String name = taskStatus.getPath().getName();
		if (isCheckpointDir(name)) {
			long currentCp = getCheckpointId(name);
			// commit paths that less than current checkpoint id.
			if (currentCp <= toCpId) {
				cps.add(currentCp);
			}
		}
	}
	return cps.stream().mapToLong(v -> v).toArray();
}
 
Example #6
Source File: FileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example #7
Source File: StanfordTweetsDataSetInputFormat.java    From flink-examples with MIT License 6 votes vote down vote up
@Override
public TweetFileInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    FileSystem fileSystem = getFileSystem();
    FileStatus[] statuses = fileSystem.listStatus(new Path(inputPath));
    logger.info("Found {} files", statuses.length);

    List<TweetFileInputSplit> splits = new ArrayList<>();
    for (int i = 0; i < statuses.length; i++) {
        FileStatus status = statuses[i];
        String fileName = status.getPath().getName();
        if (fileName.endsWith("edges")) {
            splits.add(new TweetFileInputSplit(i, status.getPath()));
        }
    }

    logger.info("Result number of splits: {}", splits.size());
    return splits.toArray(new TweetFileInputSplit[splits.size()]);
}
 
Example #8
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
private void monitorDirAndForwardSplits(FileSystem fs,
										SourceContext<TimestampedFileInputSplit> context) throws IOException {
	assert (Thread.holdsLock(checkpointLock));

	Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
	Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);

	for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) {
		long modificationTime = splits.getKey();
		for (TimestampedFileInputSplit split: splits.getValue()) {
			LOG.info("Forwarding split: " + split);
			context.collect(split);
		}
		// update the global modification time
		globalModificationTime = Math.max(globalModificationTime, modificationTime);
	}
}
 
Example #9
Source File: BinaryInputFormat.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example #10
Source File: DistributedCacheDfsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public String map(Integer value) throws Exception {
	final Path actualFile = new Path(getRuntimeContext().getDistributedCache().getFile("test_data").toURI());

	Path path = new Path(actualFile.toUri());
	assertFalse(path.getFileSystem().isDistributedFS());

	DataInputStream in = new DataInputStream(actualFile.getFileSystem().open(actualFile));
	String contents = in.readUTF();

	assertEquals(testFileContent, contents);

	final Path actualDir = new Path(getRuntimeContext().getDistributedCache().getFile("test_dir").toURI());
	FileStatus fileStatus = actualDir.getFileSystem().getFileStatus(actualDir);
	assertTrue(fileStatus.isDir());
	FileStatus[] fileStatuses = actualDir.getFileSystem().listStatus(actualDir);
	assertEquals(2, fileStatuses.length);

	return contents;
}
 
Example #11
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates the input splits to be forwarded to the downstream tasks of the
 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
 * list will be processed.
 * @param eligibleFiles The files to process.
 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
			Map<Path, FileStatus> eligibleFiles) throws IOException {

	Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
	if (eligibleFiles.isEmpty()) {
		return splitsByModTime;
	}

	for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
		FileStatus fileStatus = eligibleFiles.get(split.getPath());
		if (fileStatus != null) {
			Long modTime = fileStatus.getModificationTime();
			List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
			if (splitsToForward == null) {
				splitsToForward = new ArrayList<>();
				splitsByModTime.put(modTime, splitsToForward);
			}
			splitsToForward.add(new TimestampedFileInputSplit(
				modTime, split.getSplitNumber(), split.getPath(),
				split.getStart(), split.getLength(), split.getHostnames()));
		}
	}
	return splitsByModTime;
}
 
Example #12
Source File: ContinuousFileMonitoringFunction.java    From flink with Apache License 2.0 6 votes vote down vote up
private void monitorDirAndForwardSplits(FileSystem fs,
										SourceContext<TimestampedFileInputSplit> context) throws IOException {
	assert (Thread.holdsLock(checkpointLock));

	Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
	Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);

	for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) {
		long modificationTime = splits.getKey();
		for (TimestampedFileInputSplit split: splits.getValue()) {
			LOG.info("Forwarding split: " + split);
			context.collect(split);
		}
		// update the global modification time
		globalModificationTime = Math.max(globalModificationTime, modificationTime);
	}
}
 
Example #13
Source File: FileInputFormat.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException {

		long totalLength = 0;
		long latestModTime = 0;

		for (Path path : filePaths) {
			final FileSystem fs = FileSystem.get(path.toUri());
			final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files);

			if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) {
				totalLength = BaseStatistics.SIZE_UNKNOWN;
			} else if (totalLength != BaseStatistics.SIZE_UNKNOWN) {
				totalLength += stats.getTotalInputSize();
			}
			latestModTime = Math.max(latestModTime, stats.getLastModificationTime());
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example #14
Source File: HadoopFileSystem.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len)
		throws IOException {
	if (!(file instanceof HadoopFileStatus)) {
		throw new IOException("file is not an instance of DistributedFileStatus");
	}

	final HadoopFileStatus f = (HadoopFileStatus) file;

	final org.apache.hadoop.fs.BlockLocation[] blkLocations = fs.getFileBlockLocations(f.getInternalFileStatus(),
		start, len);

	// Wrap up HDFS specific block location objects
	final HadoopBlockLocation[] distBlkLocations = new HadoopBlockLocation[blkLocations.length];
	for (int i = 0; i < distBlkLocations.length; i++) {
		distBlkLocations[i] = new HadoopBlockLocation(blkLocations[i]);
	}

	return distBlkLocations;
}
 
Example #15
Source File: FileCacheDirectoriesTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void testDirectoryDownloaded(DistributedCache.DistributedCacheEntry entry) throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	// copy / create the file
	final String fileName = "test_file";
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example #16
Source File: RocksIncrementalSnapshotStrategy.java    From flink with Apache License 2.0 6 votes vote down vote up
private void createUploadFilePaths(
	FileStatus[] fileStatuses,
	Map<StateHandleID, StreamStateHandle> sstFiles,
	Map<StateHandleID, Path> sstFilePaths,
	Map<StateHandleID, Path> miscFilePaths) {
	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		final StateHandleID stateHandleID = new StateHandleID(fileName);

		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			final boolean existsAlready = baseSstFiles != null && baseSstFiles.contains(stateHandleID);

			if (existsAlready) {
				// we introduce a placeholder state handle, that is replaced with the
				// original from the shared state registry (created from a previous checkpoint)
				sstFiles.put(stateHandleID, new PlaceholderStreamStateHandle());
			} else {
				sstFilePaths.put(stateHandleID, filePath);
			}
		} else {
			miscFilePaths.put(stateHandleID, filePath);
		}
	}
}
 
Example #17
Source File: RocksDBIncrementalRestoreOperation.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}
 
Example #18
Source File: RocksIncrementalSnapshotStrategy.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void uploadSstFiles(
	@Nonnull Map<StateHandleID, StreamStateHandle> sstFiles,
	@Nonnull Map<StateHandleID, StreamStateHandle> miscFiles) throws Exception {

	// write state data
	Preconditions.checkState(localBackupDirectory.exists());

	Map<StateHandleID, Path> sstFilePaths = new HashMap<>();
	Map<StateHandleID, Path> miscFilePaths = new HashMap<>();

	FileStatus[] fileStatuses = localBackupDirectory.listStatus();
	if (fileStatuses != null) {
		createUploadFilePaths(fileStatuses, sstFiles, sstFilePaths, miscFilePaths);

		sstFiles.putAll(stateUploader.uploadFilesToCheckpointFs(
			sstFilePaths,
			checkpointStreamFactory,
			snapshotCloseableRegistry));
		miscFiles.putAll(stateUploader.uploadFilesToCheckpointFs(
			miscFilePaths,
			checkpointStreamFactory,
			snapshotCloseableRegistry));
	}
}
 
Example #19
Source File: RocksIncrementalSnapshotStrategy.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void createUploadFilePaths(
	FileStatus[] fileStatuses,
	Map<StateHandleID, StreamStateHandle> sstFiles,
	Map<StateHandleID, Path> sstFilePaths,
	Map<StateHandleID, Path> miscFilePaths) {
	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		final StateHandleID stateHandleID = new StateHandleID(fileName);

		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			final boolean existsAlready = baseSstFiles != null && baseSstFiles.contains(stateHandleID);

			if (existsAlready) {
				// we introduce a placeholder state handle, that is replaced with the
				// original from the shared state registry (created from a previous checkpoint)
				sstFiles.put(stateHandleID, new PlaceholderStreamStateHandle());
			} else {
				sstFilePaths.put(stateHandleID, filePath);
			}
		} else {
			miscFilePaths.put(stateHandleID, filePath);
		}
	}
}
 
Example #20
Source File: RocksDBIncrementalRestoreOperation.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}
 
Example #21
Source File: DistributedCacheDfsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public String map(Integer value) throws Exception {
	final Path actualFile = new Path(getRuntimeContext().getDistributedCache().getFile("test_data").toURI());

	Path path = new Path(actualFile.toUri());
	assertFalse(path.getFileSystem().isDistributedFS());

	DataInputStream in = new DataInputStream(actualFile.getFileSystem().open(actualFile));
	String contents = in.readUTF();

	assertEquals(testFileContent, contents);

	final Path actualDir = new Path(getRuntimeContext().getDistributedCache().getFile("test_dir").toURI());
	FileStatus fileStatus = actualDir.getFileSystem().getFileStatus(actualDir);
	assertTrue(fileStatus.isDir());
	FileStatus[] fileStatuses = actualDir.getFileSystem().listStatus(actualDir);
	assertEquals(2, fileStatuses.length);

	return contents;
}
 
Example #22
Source File: FileUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException {
	String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', "");
	if (fs.getFileStatus(fileOrDirectory).isDir()) {
		out.putNextEntry(new ZipEntry(relativePath + '/'));
		for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) {
			addToZip(containedFile.getPath(), fs, rootDir, out);
		}
	} else {
		ZipEntry entry = new ZipEntry(relativePath);
		out.putNextEntry(entry);

		try (FSDataInputStream in = fs.open(fileOrDirectory)) {
			IOUtils.copyBytes(in, out, false);
		}
		out.closeEntry();
	}
}
 
Example #23
Source File: FileMonitoringFunction.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example #24
Source File: LocalFileSystem.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public FileStatus[] listStatus(final Path f) throws IOException {

	final File localf = pathToFile(f);
	FileStatus[] results;

	if (!localf.exists()) {
		return null;
	}
	if (localf.isFile()) {
		return new FileStatus[] { new LocalFileStatus(localf, this) };
	}

	final String[] names = localf.list();
	if (names == null) {
		return null;
	}
	results = new FileStatus[names.length];
	for (int i = 0; i < names.length; i++) {
		results[i] = getFileStatus(new Path(f, names[i]));
	}

	return results;
}
 
Example #25
Source File: HadoopFileSystem.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len)
		throws IOException {
	if (!(file instanceof HadoopFileStatus)) {
		throw new IOException("file is not an instance of DistributedFileStatus");
	}

	final HadoopFileStatus f = (HadoopFileStatus) file;

	final org.apache.hadoop.fs.BlockLocation[] blkLocations = fs.getFileBlockLocations(f.getInternalFileStatus(),
		start, len);

	// Wrap up HDFS specific block location objects
	final HadoopBlockLocation[] distBlkLocations = new HadoopBlockLocation[blkLocations.length];
	for (int i = 0; i < distBlkLocations.length; i++) {
		distBlkLocations[i] = new HadoopBlockLocation(blkLocations[i]);
	}

	return distBlkLocations;
}
 
Example #26
Source File: FileUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException {
	String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', "");
	if (fs.getFileStatus(fileOrDirectory).isDir()) {
		out.putNextEntry(new ZipEntry(relativePath + '/'));
		for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) {
			addToZip(containedFile.getPath(), fs, rootDir, out);
		}
	} else {
		ZipEntry entry = new ZipEntry(relativePath);
		out.putNextEntry(entry);

		try (FSDataInputStream in = fs.open(fileOrDirectory)) {
			IOUtils.copyBytes(in, out, false);
		}
		out.closeEntry();
	}
}
 
Example #27
Source File: LocalFileSystem.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public FileStatus[] listStatus(final Path f) throws IOException {

	final File localf = pathToFile(f);
	FileStatus[] results;

	if (!localf.exists()) {
		return null;
	}
	if (localf.isFile()) {
		return new FileStatus[] { new LocalFileStatus(localf, this) };
	}

	final String[] names = localf.list();
	if (names == null) {
		return null;
	}
	results = new FileStatus[names.length];
	for (int i = 0; i < names.length; i++) {
		results[i] = getFileStatus(new Path(f, names[i]));
	}

	return results;
}
 
Example #28
Source File: ContinuousFileMonitoringFunction.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private void monitorDirAndForwardSplits(FileSystem fs,
										SourceContext<TimestampedFileInputSplit> context) throws IOException {
	assert (Thread.holdsLock(checkpointLock));

	Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
	Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);

	for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) {
		long modificationTime = splits.getKey();
		for (TimestampedFileInputSplit split: splits.getValue()) {
			LOG.info("Forwarding split: " + split);
			context.collect(split);
		}
		// update the global modification time
		globalModificationTime = Math.max(globalModificationTime, modificationTime);
	}
}
 
Example #29
Source File: FileCacheDirectoriesTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testDirectoryDownloadedFromBlob() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example #30
Source File: PartitionPathUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void listStatusRecursively(
		FileSystem fs,
		FileStatus fileStatus,
		int level,
		int expectLevel,
		List<FileStatus> results) throws IOException {
	if (expectLevel == level) {
		results.add(fileStatus);
		return;
	}

	if (fileStatus.isDir()) {
		for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
			listStatusRecursively(fs, stat, level + 1, expectLevel, results);
		}
	}
}