Java Code Examples for org.apache.flink.core.fs.FileStatus

The following examples show how to use org.apache.flink.core.fs.FileStatus. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: FileInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException {

		long totalLength = 0;
		long latestModTime = 0;

		for (Path path : filePaths) {
			final FileSystem fs = FileSystem.get(path.toUri());
			final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files);

			if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) {
				totalLength = BaseStatistics.SIZE_UNKNOWN;
			} else if (totalLength != BaseStatistics.SIZE_UNKNOWN) {
				totalLength += stats.getTotalInputSize();
			}
			latestModTime = Math.max(latestModTime, stats.getLastModificationTime());
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 2
Source Project: flink   Source File: ContinuousFileMonitoringFunction.java    License: Apache License 2.0 6 votes vote down vote up
private void monitorDirAndForwardSplits(FileSystem fs,
										SourceContext<TimestampedFileInputSplit> context) throws IOException {
	assert (Thread.holdsLock(checkpointLock));

	Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
	Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);

	for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) {
		long modificationTime = splits.getKey();
		for (TimestampedFileInputSplit split: splits.getValue()) {
			LOG.info("Forwarding split: " + split);
			context.collect(split);
		}
		// update the global modification time
		globalModificationTime = Math.max(globalModificationTime, modificationTime);
	}
}
 
Example 3
Source Project: Flink-CEPplus   Source File: BinaryInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}
 
Example 4
Source Project: Flink-CEPplus   Source File: FileUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException {
	String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', "");
	if (fs.getFileStatus(fileOrDirectory).isDir()) {
		out.putNextEntry(new ZipEntry(relativePath + '/'));
		for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) {
			addToZip(containedFile.getPath(), fs, rootDir, out);
		}
	} else {
		ZipEntry entry = new ZipEntry(relativePath);
		out.putNextEntry(entry);

		try (FSDataInputStream in = fs.open(fileOrDirectory)) {
			IOUtils.copyBytes(in, out, false);
		}
		out.closeEntry();
	}
}
 
Example 5
Source Project: Flink-CEPplus   Source File: LocalFileSystem.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public FileStatus[] listStatus(final Path f) throws IOException {

	final File localf = pathToFile(f);
	FileStatus[] results;

	if (!localf.exists()) {
		return null;
	}
	if (localf.isFile()) {
		return new FileStatus[] { new LocalFileStatus(localf, this) };
	}

	final String[] names = localf.list();
	if (names == null) {
		return null;
	}
	results = new FileStatus[names.length];
	for (int i = 0; i < names.length; i++) {
		results[i] = getFileStatus(new Path(f, names[i]));
	}

	return results;
}
 
Example 6
Source Project: Flink-CEPplus   Source File: DistributedCacheDfsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public String map(Integer value) throws Exception {
	final Path actualFile = new Path(getRuntimeContext().getDistributedCache().getFile("test_data").toURI());

	Path path = new Path(actualFile.toUri());
	assertFalse(path.getFileSystem().isDistributedFS());

	DataInputStream in = new DataInputStream(actualFile.getFileSystem().open(actualFile));
	String contents = in.readUTF();

	assertEquals(testFileContent, contents);

	final Path actualDir = new Path(getRuntimeContext().getDistributedCache().getFile("test_dir").toURI());
	FileStatus fileStatus = actualDir.getFileSystem().getFileStatus(actualDir);
	assertTrue(fileStatus.isDir());
	FileStatus[] fileStatuses = actualDir.getFileSystem().listStatus(actualDir);
	assertEquals(2, fileStatuses.length);

	return contents;
}
 
Example 7
/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}
 
Example 8
private void uploadSstFiles(
	@Nonnull Map<StateHandleID, StreamStateHandle> sstFiles,
	@Nonnull Map<StateHandleID, StreamStateHandle> miscFiles) throws Exception {

	// write state data
	Preconditions.checkState(localBackupDirectory.exists());

	Map<StateHandleID, Path> sstFilePaths = new HashMap<>();
	Map<StateHandleID, Path> miscFilePaths = new HashMap<>();

	FileStatus[] fileStatuses = localBackupDirectory.listStatus();
	if (fileStatuses != null) {
		createUploadFilePaths(fileStatuses, sstFiles, sstFilePaths, miscFilePaths);

		sstFiles.putAll(stateUploader.uploadFilesToCheckpointFs(
			sstFilePaths,
			checkpointStreamFactory,
			snapshotCloseableRegistry));
		miscFiles.putAll(stateUploader.uploadFilesToCheckpointFs(
			miscFilePaths,
			checkpointStreamFactory,
			snapshotCloseableRegistry));
	}
}
 
Example 9
private void createUploadFilePaths(
	FileStatus[] fileStatuses,
	Map<StateHandleID, StreamStateHandle> sstFiles,
	Map<StateHandleID, Path> sstFilePaths,
	Map<StateHandleID, Path> miscFilePaths) {
	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		final StateHandleID stateHandleID = new StateHandleID(fileName);

		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			final boolean existsAlready = baseSstFiles != null && baseSstFiles.contains(stateHandleID);

			if (existsAlready) {
				// we introduce a placeholder state handle, that is replaced with the
				// original from the shared state registry (created from a previous checkpoint)
				sstFiles.put(stateHandleID, new PlaceholderStreamStateHandle());
			} else {
				sstFilePaths.put(stateHandleID, filePath);
			}
		} else {
			miscFilePaths.put(stateHandleID, filePath);
		}
	}
}
 
Example 10
Source Project: flink   Source File: FileUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException {
	String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', "");
	if (fs.getFileStatus(fileOrDirectory).isDir()) {
		out.putNextEntry(new ZipEntry(relativePath + '/'));
		for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) {
			addToZip(containedFile.getPath(), fs, rootDir, out);
		}
	} else {
		ZipEntry entry = new ZipEntry(relativePath);
		out.putNextEntry(entry);

		try (FSDataInputStream in = fs.open(fileOrDirectory)) {
			IOUtils.copyBytes(in, out, false);
		}
		out.closeEntry();
	}
}
 
Example 11
Source Project: Flink-CEPplus   Source File: FileCacheDirectoriesTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDirectoryDownloadedFromBlob() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example 12
Source Project: flink   Source File: PartitionPathUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static void listStatusRecursively(
		FileSystem fs,
		FileStatus fileStatus,
		int level,
		int expectLevel,
		List<FileStatus> results) throws IOException {
	if (expectLevel == level) {
		results.add(fileStatus);
		return;
	}

	if (fileStatus.isDir()) {
		for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
			listStatusRecursively(fs, stat, level + 1, expectLevel, results);
		}
	}
}
 
Example 13
private void monitorDirAndForwardSplits(FileSystem fs,
										SourceContext<TimestampedFileInputSplit> context) throws IOException {
	assert (Thread.holdsLock(checkpointLock));

	Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
	Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);

	for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) {
		long modificationTime = splits.getKey();
		for (TimestampedFileInputSplit split: splits.getValue()) {
			LOG.info("Forwarding split: " + split);
			context.collect(split);
		}
		// update the global modification time
		globalModificationTime = Math.max(globalModificationTime, modificationTime);
	}
}
 
Example 14
Source Project: flink   Source File: HadoopFileSystem.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len)
		throws IOException {
	if (!(file instanceof HadoopFileStatus)) {
		throw new IOException("file is not an instance of DistributedFileStatus");
	}

	final HadoopFileStatus f = (HadoopFileStatus) file;

	final org.apache.hadoop.fs.BlockLocation[] blkLocations = fs.getFileBlockLocations(f.getInternalFileStatus(),
		start, len);

	// Wrap up HDFS specific block location objects
	final HadoopBlockLocation[] distBlkLocations = new HadoopBlockLocation[blkLocations.length];
	for (int i = 0; i < distBlkLocations.length; i++) {
		distBlkLocations[i] = new HadoopBlockLocation(blkLocations[i]);
	}

	return distBlkLocations;
}
 
Example 15
Source Project: Flink-CEPplus   Source File: FileMonitoringFunction.java    License: Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example 16
Source Project: flink   Source File: PartitionTempFileManager.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns checkpoints whose keys are less than or equal to {@code toCpId}
 * in temporary base path.
 */
public static long[] headCheckpoints(FileSystem fs, Path basePath, long toCpId) throws IOException {
	List<Long> cps = new ArrayList<>();

	for (FileStatus taskStatus : fs.listStatus(basePath)) {
		String name = taskStatus.getPath().getName();
		if (isCheckpointDir(name)) {
			long currentCp = getCheckpointId(name);
			// commit paths that less than current checkpoint id.
			if (currentCp <= toCpId) {
				cps.add(currentCp);
			}
		}
	}
	return cps.stream().mapToLong(v -> v).toArray();
}
 
Example 17
Source Project: flink   Source File: FileInputFormat.java    License: Apache License 2.0 6 votes vote down vote up
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException {

		long totalLength = 0;
		long latestModTime = 0;

		for (Path path : filePaths) {
			final FileSystem fs = FileSystem.get(path.toUri());
			final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files);

			if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) {
				totalLength = BaseStatistics.SIZE_UNKNOWN;
			} else if (totalLength != BaseStatistics.SIZE_UNKNOWN) {
				totalLength += stats.getTotalInputSize();
			}
			latestModTime = Math.max(latestModTime, stats.getLastModificationTime());
		}

		// check whether the cached statistics are still valid, if we have any
		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
			return cachedStats;
		}

		return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
	}
 
Example 18
Source Project: flink   Source File: ContinuousFileMonitoringFunction.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates the input splits to be forwarded to the downstream tasks of the
 * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before
 * being forwarded and only splits belonging to files in the {@code eligibleFiles}
 * list will be processed.
 * @param eligibleFiles The files to process.
 */
private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime(
			Map<Path, FileStatus> eligibleFiles) throws IOException {

	Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>();
	if (eligibleFiles.isEmpty()) {
		return splitsByModTime;
	}

	for (FileInputSplit split: format.createInputSplits(readerParallelism)) {
		FileStatus fileStatus = eligibleFiles.get(split.getPath());
		if (fileStatus != null) {
			Long modTime = fileStatus.getModificationTime();
			List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime);
			if (splitsToForward == null) {
				splitsToForward = new ArrayList<>();
				splitsByModTime.put(modTime, splitsToForward);
			}
			splitsToForward.add(new TimestampedFileInputSplit(
				modTime, split.getSplitNumber(), split.getPath(),
				split.getStart(), split.getLength(), split.getHostnames()));
		}
	}
	return splitsByModTime;
}
 
Example 19
Source Project: flink   Source File: LocalFileSystem.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public FileStatus[] listStatus(final Path f) throws IOException {

	final File localf = pathToFile(f);
	FileStatus[] results;

	if (!localf.exists()) {
		return null;
	}
	if (localf.isFile()) {
		return new FileStatus[] { new LocalFileStatus(localf, this) };
	}

	final String[] names = localf.list();
	if (names == null) {
		return null;
	}
	results = new FileStatus[names.length];
	for (int i = 0; i < names.length; i++) {
		results[i] = getFileStatus(new Path(f, names[i]));
	}

	return results;
}
 
Example 20
Source Project: flink   Source File: FileUtils.java    License: Apache License 2.0 6 votes vote down vote up
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException {
	String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', "");
	if (fs.getFileStatus(fileOrDirectory).isDir()) {
		out.putNextEntry(new ZipEntry(relativePath + '/'));
		for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) {
			addToZip(containedFile.getPath(), fs, rootDir, out);
		}
	} else {
		ZipEntry entry = new ZipEntry(relativePath);
		out.putNextEntry(entry);

		try (FSDataInputStream in = fs.open(fileOrDirectory)) {
			IOUtils.copyBytes(in, out, false);
		}
		out.closeEntry();
	}
}
 
Example 21
Source Project: flink   Source File: LocalFileSystem.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public FileStatus[] listStatus(final Path f) throws IOException {

	final File localf = pathToFile(f);
	FileStatus[] results;

	if (!localf.exists()) {
		return null;
	}
	if (localf.isFile()) {
		return new FileStatus[] { new LocalFileStatus(localf, this) };
	}

	final String[] names = localf.list();
	if (names == null) {
		return null;
	}
	results = new FileStatus[names.length];
	for (int i = 0; i < names.length; i++) {
		results[i] = getFileStatus(new Path(f, names[i]));
	}

	return results;
}
 
Example 22
Source Project: flink   Source File: DistributedCacheDfsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public String map(Integer value) throws Exception {
	final Path actualFile = new Path(getRuntimeContext().getDistributedCache().getFile("test_data").toURI());

	Path path = new Path(actualFile.toUri());
	assertFalse(path.getFileSystem().isDistributedFS());

	DataInputStream in = new DataInputStream(actualFile.getFileSystem().open(actualFile));
	String contents = in.readUTF();

	assertEquals(testFileContent, contents);

	final Path actualDir = new Path(getRuntimeContext().getDistributedCache().getFile("test_dir").toURI());
	FileStatus fileStatus = actualDir.getFileSystem().getFileStatus(actualDir);
	assertTrue(fileStatus.isDir());
	FileStatus[] fileStatuses = actualDir.getFileSystem().listStatus(actualDir);
	assertEquals(2, fileStatuses.length);

	return contents;
}
 
Example 23
Source Project: flink   Source File: RocksDBIncrementalRestoreOperation.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}
 
Example 24
Source Project: flink   Source File: FileCacheDirectoriesTest.java    License: Apache License 2.0 6 votes vote down vote up
private void testDirectoryDownloaded(DistributedCache.DistributedCacheEntry entry) throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	// copy / create the file
	final String fileName = "test_file";
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example 25
Source Project: flink   Source File: RocksIncrementalSnapshotStrategy.java    License: Apache License 2.0 6 votes vote down vote up
private void createUploadFilePaths(
	FileStatus[] fileStatuses,
	Map<StateHandleID, StreamStateHandle> sstFiles,
	Map<StateHandleID, Path> sstFilePaths,
	Map<StateHandleID, Path> miscFilePaths) {
	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		final StateHandleID stateHandleID = new StateHandleID(fileName);

		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			final boolean existsAlready = baseSstFiles != null && baseSstFiles.contains(stateHandleID);

			if (existsAlready) {
				// we introduce a placeholder state handle, that is replaced with the
				// original from the shared state registry (created from a previous checkpoint)
				sstFiles.put(stateHandleID, new PlaceholderStreamStateHandle());
			} else {
				sstFilePaths.put(stateHandleID, filePath);
			}
		} else {
			miscFilePaths.put(stateHandleID, filePath);
		}
	}
}
 
Example 26
Source Project: flink   Source File: HadoopFileSystem.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len)
		throws IOException {
	if (!(file instanceof HadoopFileStatus)) {
		throw new IOException("file is not an instance of DistributedFileStatus");
	}

	final HadoopFileStatus f = (HadoopFileStatus) file;

	final org.apache.hadoop.fs.BlockLocation[] blkLocations = fs.getFileBlockLocations(f.getInternalFileStatus(),
		start, len);

	// Wrap up HDFS specific block location objects
	final HadoopBlockLocation[] distBlkLocations = new HadoopBlockLocation[blkLocations.length];
	for (int i = 0; i < distBlkLocations.length; i++) {
		distBlkLocations[i] = new HadoopBlockLocation(blkLocations[i]);
	}

	return distBlkLocations;
}
 
Example 27
Source Project: flink   Source File: FileMonitoringFunction.java    License: Apache License 2.0 6 votes vote down vote up
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
	List<String> files = new ArrayList<String>();

	FileStatus[] statuses = fileSystem.listStatus(new Path(path));

	if (statuses == null) {
		LOG.warn("Path does not exist: {}", path);
	} else {
		for (FileStatus status : statuses) {
			Path filePath = status.getPath();
			String fileName = filePath.getName();
			long modificationTime = status.getModificationTime();

			if (!isFiltered(fileName, modificationTime)) {
				files.add(filePath.toString());
				modificationTimes.put(fileName, modificationTime);
			}
		}
	}

	return files;
}
 
Example 28
Source Project: flink   Source File: FileCacheDirectoriesTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDirectoryDownloadedFromBlob() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	assertTrue(fileStatus.isDir());

	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fs.exists(cacheFile));
	final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath()));
	assertEquals(testFileContent, actualContent);
}
 
Example 29
Source Project: flink-examples   Source File: StanfordTweetsDataSetInputFormat.java    License: MIT License 6 votes vote down vote up
@Override
public TweetFileInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    FileSystem fileSystem = getFileSystem();
    FileStatus[] statuses = fileSystem.listStatus(new Path(inputPath));
    logger.info("Found {} files", statuses.length);

    List<TweetFileInputSplit> splits = new ArrayList<>();
    for (int i = 0; i < statuses.length; i++) {
        FileStatus status = statuses[i];
        String fileName = status.getPath().getName();
        if (fileName.endsWith("edges")) {
            splits.add(new TweetFileInputSplit(i, status.getPath()));
        }
    }

    logger.info("Result number of splits: {}", splits.size());
    return splits.toArray(new TweetFileInputSplit[splits.size()]);
}
 
Example 30
Source Project: flink   Source File: ContinuousFileMonitoringFunction.java    License: Apache License 2.0 6 votes vote down vote up
private void monitorDirAndForwardSplits(FileSystem fs,
										SourceContext<TimestampedFileInputSplit> context) throws IOException {
	assert (Thread.holdsLock(checkpointLock));

	Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path));
	Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles);

	for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) {
		long modificationTime = splits.getKey();
		for (TimestampedFileInputSplit split: splits.getValue()) {
			LOG.info("Forwarding split: " + split);
			context.collect(split);
		}
		// update the global modification time
		globalModificationTime = Math.max(globalModificationTime, modificationTime);
	}
}