Java Code Examples for org.apache.hadoop.fs.LocatedFileStatus#isDirectory()
The following examples show how to use
org.apache.hadoop.fs.LocatedFileStatus#isDirectory() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestATSHistoryV15.java From tez with Apache License 2.0 | 6 votes |
private int verifyATSDataOnHDFS(Path p, ApplicationId applicationId) throws IOException { int count = 0; RemoteIterator<LocatedFileStatus> iter = remoteFs.listFiles(p, true); while (iter.hasNext()) { LocatedFileStatus f = iter.next(); LOG.info("Found file " + f.toString()); if (f.isDirectory()) { count += verifyATSDataOnHDFS(f.getPath(), applicationId); } else { if (f.getPath().getName().contains( "" + applicationId.getClusterTimestamp() + "_" + applicationId.getId())) { ++count; } } } return count; }
Example 2
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 3
Source File: LocatedFileStatusFetcher.java From hadoop with Apache License 2.0 | 6 votes |
@Override public Result call() throws Exception { Result result = new Result(); result.fs = fs; if (fileStatus.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs .listLocatedStatus(fileStatus.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { result.dirsNeedingRecursiveCalls.add(stat); } else { result.locatedFileStatuses.add(stat); } } } } else { result.locatedFileStatuses.add(fileStatus); } return result; }
Example 4
Source File: BackupUtils.java From hbase with Apache License 2.0 | 6 votes |
public static List<String> getFiles(FileSystem fs, Path rootDir, List<String> files, PathFilter filter) throws IOException { RemoteIterator<LocatedFileStatus> it = fs.listFiles(rootDir, true); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.isDirectory()) { continue; } // apply filter if (filter.accept(lfs.getPath())) { files.add(lfs.getPath().toString()); } } return files; }
Example 5
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 6
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 7
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 8
Source File: LocatedFileStatusFetcher.java From big-c with Apache License 2.0 | 6 votes |
@Override public Result call() throws Exception { Result result = new Result(); result.fs = fs; if (fileStatus.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs .listLocatedStatus(fileStatus.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { result.dirsNeedingRecursiveCalls.add(stat); } else { result.locatedFileStatuses.add(stat); } } } } else { result.locatedFileStatuses.add(fileStatus); } return result; }
Example 9
Source File: JobLibLoader.java From SpyGlass with Apache License 2.0 | 6 votes |
public static void loadJars(String libPathStr, Configuration config) { try { Path libPath = new Path(libPathStr); FileSystem fs = FileSystem.get(config); RemoteIterator<LocatedFileStatus> itr = fs.listFiles(libPath, true); while (itr.hasNext()) { LocatedFileStatus f = itr.next(); if (!f.isDirectory() && f.getPath().getName().endsWith("jar")) { logger.info("Loading Jar : " + f.getPath().getName()); DistributedCache.addFileToClassPath(f.getPath(), config); } } } catch (Exception e) { e.printStackTrace(); logger.error(e.toString()); } }
Example 10
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 11
Source File: WALInputFormat.java From hbase with Apache License 2.0 | 5 votes |
private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime) throws IOException { List<FileStatus> result = new ArrayList<>(); LOG.debug("Scanning " + dir.toString() + " for WAL files"); RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir); if (!iter.hasNext()) return Collections.emptyList(); while (iter.hasNext()) { LocatedFileStatus file = iter.next(); if (file.isDirectory()) { // recurse into sub directories result.addAll(getFiles(fs, file.getPath(), startTime, endTime)); } else { String name = file.getPath().toString(); int idx = name.lastIndexOf('.'); if (idx > 0) { try { long fileStartTime = Long.parseLong(name.substring(idx+1)); if (fileStartTime <= endTime) { LOG.info("Found: " + file); result.add(file); } } catch (NumberFormatException x) { idx = 0; } } if (idx == 0) { LOG.warn("File " + name + " does not appear to be an WAL file. Skipping..."); } } } return result; }
Example 12
Source File: HDFSResourceStore.java From kylin with Apache License 2.0 | 5 votes |
@Override protected void visitFolderImpl(String folderPath, boolean recursive, VisitFilter filter, boolean loadContent, Visitor visitor) throws IOException { Path p = getRealHDFSPath(folderPath); if (!fs.exists(p) || !fs.isDirectory(p)) { return; } String fsPathPrefix = p.toUri().getPath(); String resPathPrefix = folderPath.endsWith("/") ? folderPath : folderPath + "/"; RemoteIterator<LocatedFileStatus> it = fs.listFiles(p, recursive); while (it.hasNext()) { LocatedFileStatus status = it.next(); if (status.isDirectory()) continue; String path = status.getPath().toUri().getPath(); if (!path.startsWith(fsPathPrefix)) throw new IllegalStateException("File path " + path + " is supposed to start with " + fsPathPrefix); String resPath = resPathPrefix + path.substring(fsPathPrefix.length() + 1); if (filter.matches(resPath, status.getModificationTime())) { RawResource raw; if (loadContent) raw = new RawResource(resPath, status.getModificationTime(), fs.open(status.getPath())); else raw = new RawResource(resPath, status.getModificationTime()); try { visitor.visit(raw); } finally { raw.close(); } } } }
Example 13
Source File: QueryInputFormat.java From Halyard with Apache License 2.0 | 5 votes |
public static void addQueryRecursively(Configuration conf, Path path, boolean sparqlUpdate, int stage) throws IOException { RemoteIterator<LocatedFileStatus> iter = path.getFileSystem(conf).listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (stat.isDirectory()) { addQueryRecursively(conf, stat.getPath(), sparqlUpdate, stage); } else { addQuery(conf, stat, sparqlUpdate, stage); } } }
Example 14
Source File: HiveFileIterator.java From presto with Apache License 2.0 | 5 votes |
@Override protected LocatedFileStatus computeNext() { while (true) { while (remoteIterator.hasNext()) { LocatedFileStatus status = getLocatedFileStatus(remoteIterator); // Ignore hidden files and directories. Hive ignores files starting with _ and . as well. String fileName = status.getPath().getName(); if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; } if (status.isDirectory()) { switch (nestedDirectoryPolicy) { case IGNORED: continue; case RECURSE: paths.add(status.getPath()); continue; case FAIL: throw new NestedDirectoryNotAllowedException(); } } return status; } if (paths.isEmpty()) { return endOfData(); } remoteIterator = getLocatedFileStatusRemoteIterator(paths.removeFirst()); } }
Example 15
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 16
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 17
Source File: HDFSResourceStore.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override protected void visitFolderImpl(String folderPath, boolean recursive, VisitFilter filter, boolean loadContent, Visitor visitor) throws IOException { Path p = getRealHDFSPath(folderPath); if (!fs.exists(p) || !fs.isDirectory(p)) { return; } String fsPathPrefix = p.toUri().getPath(); String resPathPrefix = folderPath.endsWith("/") ? folderPath : folderPath + "/"; RemoteIterator<LocatedFileStatus> it = fs.listFiles(p, recursive); while (it.hasNext()) { LocatedFileStatus status = it.next(); if (status.isDirectory()) continue; String path = status.getPath().toUri().getPath(); if (!path.startsWith(fsPathPrefix)) throw new IllegalStateException("File path " + path + " is supposed to start with " + fsPathPrefix); String resPath = resPathPrefix + path.substring(fsPathPrefix.length() + 1); if (filter.matches(resPath, status.getModificationTime())) { RawResource raw; if (loadContent) raw = new RawResource(resPath, status.getModificationTime(), fs.open(status.getPath())); else raw = new RawResource(resPath, status.getModificationTime()); try { visitor.visit(raw); } finally { raw.close(); } } } }
Example 18
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 4 votes |
public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job, boolean isRecursive, boolean directoriesOnly, int partitionId) { final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>(); final Path rootLocation = new Path(partitionDir); try { // TODO: DX-16001 - make async configurable for Hive. final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job); if (fs.exists(rootLocation)) { final FileStatus rootStatus = fs.getFileStatus(rootLocation); if (rootStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(true) .build()); final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false); while (statuses.hasNext()) { LocatedFileStatus fileStatus = statuses.next(); final Path filePath = fileStatus.getPath(); if (fileStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(true) .build()); } else if (fileStatus.isFile() && !directoriesOnly) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(false) .build()); } } } else { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(false) .build()); } return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder() .setPartitionId(partitionId) .setPartitionRootDir(fs.makeQualified(rootLocation).toString()) .addAllCachedEntities(cachedEntities) .build(); } return null; } catch (IOException e) { throw new RuntimeException(e); } }
Example 19
Source File: DFSPathSelector.java From hudi with Apache License 2.0 | 4 votes |
public Pair<Option<String>, String> getNextFilePathsAndMaxModificationTime(Option<String> lastCheckpointStr, long sourceLimit) { try { // obtain all eligible files under root folder. List<FileStatus> eligibleFiles = new ArrayList<>(); RemoteIterator<LocatedFileStatus> fitr = fs.listFiles(new Path(props.getString(Config.ROOT_INPUT_PATH_PROP)), true); while (fitr.hasNext()) { LocatedFileStatus fileStatus = fitr.next(); if (fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream().anyMatch(pfx -> fileStatus.getPath().getName().startsWith(pfx))) { continue; } eligibleFiles.add(fileStatus); } // sort them by modification time. eligibleFiles.sort(Comparator.comparingLong(FileStatus::getModificationTime)); // Filter based on checkpoint & input size, if needed long currentBytes = 0; long maxModificationTime = Long.MIN_VALUE; List<FileStatus> filteredFiles = new ArrayList<>(); for (FileStatus f : eligibleFiles) { if (lastCheckpointStr.isPresent() && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get()).longValue()) { // skip processed files continue; } if (currentBytes + f.getLen() >= sourceLimit) { // we have enough data, we are done break; } maxModificationTime = f.getModificationTime(); currentBytes += f.getLen(); filteredFiles.add(f); } // no data to read if (filteredFiles.size() == 0) { return new ImmutablePair<>(Option.empty(), lastCheckpointStr.orElseGet(() -> String.valueOf(Long.MIN_VALUE))); } // read the files out. String pathStr = filteredFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(",")); return new ImmutablePair<>(Option.ofNullable(pathStr), String.valueOf(maxModificationTime)); } catch (IOException ioe) { throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe); } }
Example 20
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 4 votes |
public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job, boolean isRecursive, boolean directoriesOnly, int partitionId) { final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>(); final Path rootLocation = new Path(partitionDir); try { // TODO: DX-16001 - make async configurable for Hive. final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job); if (fs.exists(rootLocation)) { final FileStatus rootStatus = fs.getFileStatus(rootLocation); if (rootStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(true) .build()); final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false); while (statuses.hasNext()) { LocatedFileStatus fileStatus = statuses.next(); final Path filePath = fileStatus.getPath(); if (fileStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(true) .build()); } else if (fileStatus.isFile() && !directoriesOnly) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(false) .build()); } } } else { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(false) .build()); } return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder() .setPartitionId(partitionId) .setPartitionRootDir(fs.makeQualified(rootLocation).toString()) .addAllCachedEntities(cachedEntities) .build(); } return null; } catch (IOException e) { throw new RuntimeException(e); } }