Java Code Examples for org.apache.hadoop.fs.FileStatus#isDir()
The following examples show how to use
org.apache.hadoop.fs.FileStatus#isDir() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MergeCommand.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * Get all input files. * @param input input files or directory. * @return ordered input files. */ private List<Path> getInputFiles(List<String> input) throws IOException { List<Path> inputFiles = null; if (input.size() == 1) { Path p = new Path(input.get(0)); FileSystem fs = p.getFileSystem(conf); FileStatus status = fs.getFileStatus(p); if (status.isDir()) { inputFiles = getInputFilesFromDirectory(status); } } else { inputFiles = parseInputFiles(input); } checkParquetFiles(inputFiles); return inputFiles; }
Example 2
Source File: AvroStorageUtils.java From Cubert with Apache License 2.0 | 6 votes |
/** get last file of a hdfs path if it is a directory; * or return the file itself if path is a file */ public static Path getLast(Path path, FileSystem fs) throws IOException { FileStatus status = fs.getFileStatus(path); if (!status.isDir()) { return path; } FileStatus[] statuses = fs.listStatus(path, PATH_FILTER); if (statuses.length == 0) { return null; } else { Arrays.sort(statuses); for (int i = statuses.length - 1; i >= 0; i--) { if (!statuses[i].isDir()) { return statuses[i].getPath(); } } return null; } }
Example 3
Source File: MapRedUtil.java From spork with Apache License 2.0 | 6 votes |
/** * Returns the total number of bytes for this file, or if a directory all * files in the directory. * * @param fs FileSystem * @param status FileStatus * @param max Maximum value of total length that will trigger exit. Many * times we're only interested whether the total length of files is greater * than X or not. In such case, we can exit the function early as soon as * the max is reached. * @return * @throws IOException */ public static long getPathLength(FileSystem fs, FileStatus status, long max) throws IOException { if (!status.isDir()) { return status.getLen(); } else { FileStatus[] children = fs.listStatus( status.getPath(), hiddenFileFilter); long size = 0; for (FileStatus child : children) { size += getPathLength(fs, child, max); if (size > max) return size; } return size; } }
Example 4
Source File: TestMapRed.java From RDFS with Apache License 2.0 | 6 votes |
private static void printFiles(Path dir, Configuration conf) throws IOException { FileSystem fs = dir.getFileSystem(conf); for(FileStatus f: fs.listStatus(dir)) { System.out.println("Reading " + f.getPath() + ": "); if (f.isDir()) { System.out.println(" it is a map file."); printSequenceFile(fs, new Path(f.getPath(), "data"), conf); } else if (isSequenceFile(fs, f.getPath())) { System.out.println(" it is a sequence file."); printSequenceFile(fs, f.getPath(), conf); } else { System.out.println(" it is a text file."); printTextFile(fs, f.getPath()); } } }
Example 5
Source File: DirectoryMonitorDiscovery.java From flink with Apache License 2.0 | 6 votes |
private static void listStatusRecursively( FileSystem fs, FileStatus fileStatus, int level, int expectLevel, List<FileStatus> results) throws IOException { if (expectLevel == level) { results.add(fileStatus); return; } if (fileStatus.isDir()) { for (FileStatus stat : fs.listStatus(fileStatus.getPath())) { listStatusRecursively(fs, stat, level + 1, expectLevel, results); } } }
Example 6
Source File: AvroHdfsFileReader.java From ml-ease with Apache License 2.0 | 6 votes |
@Override protected List<Path> getPaths(String filePath) throws IOException { Path path = new Path(filePath); FileSystem fs = path.getFileSystem(getConf()); List<Path> paths = new ArrayList<Path>(); for (FileStatus status: fs.listStatus(path)) { if (status.isDir() && !AvroUtils.shouldPathBeIgnored(status.getPath())) { paths.addAll(getPaths(status.getPath().toString())); } else if (isAvro(status.getPath())) { paths.add(status.getPath()); } } return paths; }
Example 7
Source File: MapRedUtil.java From spork with Apache License 2.0 | 6 votes |
/** * Get all files recursively from the given list of files * * @param files a list of FileStatus * @param conf the configuration object * @return the list of fileStatus that contains all the files in the given * list and, recursively, all the files inside the directories in * the given list * @throws IOException */ public static List<FileStatus> getAllFileRecursively( List<FileStatus> files, Configuration conf) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); int len = files.size(); for (int i = 0; i < len; ++i) { FileStatus file = files.get(i); if (file.isDir()) { Path p = file.getPath(); FileSystem fs = p.getFileSystem(conf); addInputPathRecursively(result, fs, p, hiddenFileFilter); } else { result.add(file); } } log.info("Total input paths to process : " + result.size()); return result; }
Example 8
Source File: RaidNode.java From RDFS with Apache License 2.0 | 6 votes |
public static List<FileStatus> listDirectoryRaidFileStatus( Configuration conf, FileSystem srcFs, Path p) throws IOException { long minFileSize = conf.getLong(MINIMUM_RAIDABLE_FILESIZE_KEY, MINIMUM_RAIDABLE_FILESIZE); List<FileStatus> lfs = new ArrayList<FileStatus>(); FileStatus[] files = srcFs.listStatus(p); for (FileStatus stat : files) { if (stat.isDir()) { return null; } // We don't raid too small files if (stat.getLen() < minFileSize) { continue; } lfs.add(stat); } if (lfs.size() == 0) return null; return lfs; }
Example 9
Source File: FastCopy.java From RDFS with Apache License 2.0 | 5 votes |
/** * Recursively lists out all the files under a given path. * * @param root * the path under which we want to list out files * @param fs * the filesystem * @param result * the list which holds all the files. * @throws IOException */ private static void getDirectoryListing(FileStatus root, FileSystem fs, List<CopyPath> result, Path dstPath) throws IOException { if (!root.isDir()) { result.add(new CopyPath(root.getPath(), dstPath)); return; } for (FileStatus child : fs.listStatus(root.getPath())) { getDirectoryListing(child, fs, result, new Path(dstPath, child.getPath() .getName())); } }
Example 10
Source File: Util.java From spork with Apache License 2.0 | 5 votes |
static public void copyFromClusterToLocal(MiniGenericCluster cluster, String fileNameOnCluster, String localFileName) throws IOException { if(Util.WINDOWS){ fileNameOnCluster = fileNameOnCluster.replace('\\','/'); localFileName = localFileName.replace('\\','/'); } File parent = new File(localFileName).getParentFile(); if (!parent.exists()) { parent.mkdirs(); } PrintWriter writer = new PrintWriter(new FileWriter(localFileName)); FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration( cluster.getProperties())); if(!fs.exists(new Path(fileNameOnCluster))) { throw new IOException("File " + fileNameOnCluster + " does not exists on the minicluster"); } String line = null; FileStatus fst = fs.getFileStatus(new Path(fileNameOnCluster)); if(fst.isDir()) { throw new IOException("Only files from cluster can be copied locally," + " " + fileNameOnCluster + " is a directory"); } FSDataInputStream stream = fs.open(new Path(fileNameOnCluster)); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); while( (line = reader.readLine()) != null) { writer.println(line); } reader.close(); writer.close(); }
Example 11
Source File: IgniteHadoopFileSystemAbstractSelfTest.java From ignite with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") @Override public int compare(FileStatus o1, FileStatus o2) { if (o1 == null || o2 == null) return o1 == o2 ? 0 : o1 == null ? -1 : 1; return o1.isDir() == o2.isDir() ? o1.getPath().compareTo(o2.getPath()) : o1.isDir() ? -1 : 1; }
Example 12
Source File: FileStatusExtended.java From RDFS with Apache License 2.0 | 5 votes |
public FileStatusExtended(FileStatus stat, Block[] blocks, String leaseHolder) { super(stat.getLen(), stat.isDir(), stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(), stat.getAccessTime(), stat.getPermission(), stat.getOwner(), stat.getGroup(), stat.getPath()); this.blocks = blocks; this.leaseHolder = (leaseHolder == null) ? "" : leaseHolder; }
Example 13
Source File: DistCp.java From RDFS with Apache License 2.0 | 5 votes |
private static void updateDestStatus(FileStatus src, FileStatus dst, EnumSet<FileAttribute> preserved, FileSystem destFileSys ) throws IOException { String owner = null; String group = null; if (preserved.contains(FileAttribute.USER) && !src.getOwner().equals(dst.getOwner())) { owner = src.getOwner(); } if (preserved.contains(FileAttribute.GROUP) && !src.getGroup().equals(dst.getGroup())) { group = src.getGroup(); } if (owner != null || group != null) { destFileSys.setOwner(dst.getPath(), owner, group); } if (preserved.contains(FileAttribute.PERMISSION) && !src.getPermission().equals(dst.getPermission())) { destFileSys.setPermission(dst.getPath(), src.getPermission()); } if (preserved.contains(FileAttribute.TIMES)) { try { destFileSys.setTimes(dst.getPath(), src.getModificationTime(), src.getAccessTime()); } catch (IOException exc) { if (!dst.isDir()) { //hadoop 0.20 doesn't allow setTimes on dirs throw exc; } } } }
Example 14
Source File: HdfsResourceLoader.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
private void doRetrieveMatchingResources(Path rootDir, String subPattern, Set<Resource> results) throws IOException { if (!this.fs.isFile(rootDir)) { FileStatus[] statuses = null; statuses = this.fs.listStatus(rootDir); if (!ObjectUtils.isEmpty(statuses)) { String root = rootDir.toUri().getPath(); FileStatus[] var6 = statuses; int var7 = statuses.length; for(int var8 = 0; var8 < var7; ++var8) { FileStatus fileStatus = var6[var8]; Path p = fileStatus.getPath(); String location = p.toUri().getPath(); if (location.startsWith(root)) { location = location.substring(root.length()); } if (fileStatus.isDir() && this.pathMatcher.matchStart(subPattern, location)) { this.doRetrieveMatchingResources(p, subPattern, results); } else if (this.pathMatcher.match(subPattern.substring(1), location)) { results.add(new HdfsResource(p, this.fs)); } } } } else if (this.pathMatcher.match(subPattern, stripPrefix(rootDir.toUri().getPath()))) { results.add(new HdfsResource(rootDir, this.fs)); } }
Example 15
Source File: HadoopConnectingFileSystemProvider.java From CloverETL-Engine with GNU Lesser General Public License v2.1 | 5 votes |
@Override public HadoopFileStatus getExtendedStatus(URI path) throws IOException { checkConnected(); FileStatus status = dfs.getFileStatus(new Path(path)); return new HadoopFileStatus(status.getPath().toUri(), status.getLen(), status.isDir(), status.getModificationTime(), status.getBlockSize(), status.getGroup(), status.getOwner(), status.getReplication()); }
Example 16
Source File: FileInputFormat.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ protected List<FileStatus> listStatus(JobContext job ) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDir()) { for(FileStatus stat: fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
Example 17
Source File: FileInputFormat.java From RDFS with Apache License 2.0 | 4 votes |
/** List input directories. * Mark this method to be final to make sure this method does not * get overridden by any subclass. * If a subclass historically overrides this method, now it needs to override * {@link #listLocatedStatus(JobContext)} instead. * * @param job the job to list input paths for * @return array of FileStatus objects * @throws IOException if zero items. */ final static protected List<FileStatus> listStatus(JobContext job ) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDir()) { for(FileStatus stat: fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
Example 18
Source File: GoogleHadoopFileSystemBase.java From hadoop-connectors with Apache License 2.0 | 4 votes |
private static boolean isImplicitDirectory(FileStatus curr) { // Modification time of 0 indicates implicit directory. return curr.isDir() && curr.getModificationTime() == 0; }
Example 19
Source File: DistCp.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting( FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf ) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); } //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for(FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = {"-rmr", null}; boolean hasnext = dstin.next(dstpath, dstfrom); for(; lsrin.next(lsrpath, lsrstatus); ) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for(; hasnext && dst_cmp_lsr < 0; ) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch(Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException("\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }
Example 20
Source File: PathPartitionHelper.java From spork with Apache License 2.0 | 4 votes |
/** * Recursively works through all directories, skipping filtered partitions. * * @param fs * @param fileStatus * @param partitionLevel * @param partitionKeys * @param splitPaths * @throws IOException */ private void getPartitionedFiles(ExpressionFactory expressionFactory, String partitionExpression, FileSystem fs, FileStatus fileStatus, int partitionLevel, String[] partitionKeys, List<FileStatus> splitPaths) throws IOException { String partition = (partitionLevel < partitionKeys.length) ? partitionKeys[partitionLevel] : null; Path path = fileStatus.getPath(); // filter out hidden files if (path.getName().startsWith("_")) { return; } // pre filter logic // return if any of the logic is not true if (partition != null) { if (fileStatus.isDir()) { // check that the dir name is equal to that of the partition // name if (!path.getName().startsWith(partition)) return; } else { // else its a file but not at the end of the partition tree so // its ignored. return; } // this means we are inside the partition so that the path will // contain all partitions plus its values // we can apply the partition filter expression here that was passed // to the HiveColumnarLoader.setPartitionExpression if (partitionLevel == (partitionKeys.length - 1) && !evaluatePartitionExpression(expressionFactory, partitionExpression, path)) { LOG.debug("Pruning partition: " + path); return; } } // after this point we now that the partition is either null // which means we are at the end of the partition tree and all files // sub directories should be included. // or that we are still navigating the partition tree. int nextPartitionLevel = partitionLevel + 1; // iterate over directories if fileStatus is a dir. FileStatus[] childStatusArr = null; if (fileStatus.isDir()) { if ((childStatusArr = fs.listStatus(path)) != null) { for (FileStatus childFileStatus : childStatusArr) { getPartitionedFiles(expressionFactory, partitionExpression, fs, childFileStatus, nextPartitionLevel, partitionKeys, splitPaths); } } } else { // add file to splitPaths splitPaths.add(fileStatus); } }