Java Code Examples for org.apache.hadoop.fs.FileStatus#isDir()

The following examples show how to use org.apache.hadoop.fs.FileStatus#isDir() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MergeCommand.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * Get all input files.
 * @param input input files or directory.
 * @return ordered input files.
 */
private List<Path> getInputFiles(List<String> input) throws IOException {
  List<Path> inputFiles = null;

  if (input.size() == 1) {
    Path p = new Path(input.get(0));
    FileSystem fs = p.getFileSystem(conf);
    FileStatus status = fs.getFileStatus(p);

    if (status.isDir()) {
      inputFiles = getInputFilesFromDirectory(status);
    }
  } else {
    inputFiles = parseInputFiles(input);
  }

  checkParquetFiles(inputFiles);

  return inputFiles;
}
 
Example 2
Source File: AvroStorageUtils.java    From Cubert with Apache License 2.0 6 votes vote down vote up
/** get last file of a hdfs path if it is  a directory;
 *   or return the file itself if path is a file
 */
public static Path getLast(Path path, FileSystem fs) throws IOException {

    FileStatus status = fs.getFileStatus(path);
    if (!status.isDir()) {
        return path;
    }
    FileStatus[] statuses = fs.listStatus(path, PATH_FILTER);

    if (statuses.length == 0) {
        return null;
    } else {
        Arrays.sort(statuses);
        for (int i = statuses.length - 1; i >= 0; i--) {
            if (!statuses[i].isDir()) {
                return statuses[i].getPath();
            }
        }
        return null;
    }
}
 
Example 3
Source File: MapRedUtil.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the total number of bytes for this file, or if a directory all
 * files in the directory.
 * 
 * @param fs FileSystem
 * @param status FileStatus
 * @param max Maximum value of total length that will trigger exit. Many
 * times we're only interested whether the total length of files is greater
 * than X or not. In such case, we can exit the function early as soon as
 * the max is reached.
 * @return
 * @throws IOException
 */
public static long getPathLength(FileSystem fs, FileStatus status, long max)
        throws IOException {
    if (!status.isDir()) {
        return status.getLen();
    } else {
        FileStatus[] children = fs.listStatus(
                status.getPath(), hiddenFileFilter);
        long size = 0;
        for (FileStatus child : children) {
            size += getPathLength(fs, child, max);
            if (size > max) return size;
        }
        return size;
    }
}
 
Example 4
Source File: TestMapRed.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private static void printFiles(Path dir, 
                               Configuration conf) throws IOException {
  FileSystem fs = dir.getFileSystem(conf);
  for(FileStatus f: fs.listStatus(dir)) {
    System.out.println("Reading " + f.getPath() + ": ");
    if (f.isDir()) {
      System.out.println("  it is a map file.");
      printSequenceFile(fs, new Path(f.getPath(), "data"), conf);
    } else if (isSequenceFile(fs, f.getPath())) {
      System.out.println("  it is a sequence file.");
      printSequenceFile(fs, f.getPath(), conf);
    } else {
      System.out.println("  it is a text file.");
      printTextFile(fs, f.getPath());
    }
  }
}
 
Example 5
Source File: DirectoryMonitorDiscovery.java    From flink with Apache License 2.0 6 votes vote down vote up
private static void listStatusRecursively(
		FileSystem fs,
		FileStatus fileStatus,
		int level,
		int expectLevel,
		List<FileStatus> results) throws IOException {
	if (expectLevel == level) {
		results.add(fileStatus);
		return;
	}

	if (fileStatus.isDir()) {
		for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
			listStatusRecursively(fs, stat, level + 1, expectLevel, results);
		}
	}
}
 
Example 6
Source File: AvroHdfsFileReader.java    From ml-ease with Apache License 2.0 6 votes vote down vote up
@Override
protected List<Path> getPaths(String filePath) throws IOException
{
  Path path = new Path(filePath);
  FileSystem fs = path.getFileSystem(getConf());
  List<Path> paths = new ArrayList<Path>();
  
  for (FileStatus status: fs.listStatus(path))
  {
    if (status.isDir() && !AvroUtils.shouldPathBeIgnored(status.getPath()))
    {
      paths.addAll(getPaths(status.getPath().toString()));
    }
    else if (isAvro(status.getPath()))
    {
      paths.add(status.getPath());
    }
  }
  return paths;
}
 
Example 7
Source File: MapRedUtil.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Get all files recursively from the given list of files
 *
 * @param files a list of FileStatus
 * @param conf the configuration object
 * @return the list of fileStatus that contains all the files in the given
 *         list and, recursively, all the files inside the directories in
 *         the given list
 * @throws IOException
 */
public static List<FileStatus> getAllFileRecursively(
        List<FileStatus> files, Configuration conf) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    int len = files.size();
    for (int i = 0; i < len; ++i) {
        FileStatus file = files.get(i);
        if (file.isDir()) {
            Path p = file.getPath();
            FileSystem fs = p.getFileSystem(conf);
            addInputPathRecursively(result, fs, p, hiddenFileFilter);
        } else {
            result.add(file);
        }
    }
    log.info("Total input paths to process : " + result.size());
    return result;
}
 
Example 8
Source File: RaidNode.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static List<FileStatus> listDirectoryRaidFileStatus(
		Configuration conf, FileSystem srcFs, Path p) throws IOException {
	long minFileSize = conf.getLong(MINIMUM_RAIDABLE_FILESIZE_KEY,
			MINIMUM_RAIDABLE_FILESIZE);
	List<FileStatus> lfs = new ArrayList<FileStatus>();
	FileStatus[] files = srcFs.listStatus(p);
	for (FileStatus stat : files) {
		if (stat.isDir()) {
			return null;
		}
		// We don't raid too small files
		if (stat.getLen() < minFileSize) {
			continue;
		}
		lfs.add(stat);
	}
	if (lfs.size() == 0)
		return null;
	return lfs;
}
 
Example 9
Source File: FastCopy.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Recursively lists out all the files under a given path.
 *
 * @param root
 *          the path under which we want to list out files
 * @param fs
 *          the filesystem
 * @param result
 *          the list which holds all the files.
 * @throws IOException
 */
private static void getDirectoryListing(FileStatus root, FileSystem fs,
    List<CopyPath> result, Path dstPath) throws IOException {
  if (!root.isDir()) {
    result.add(new CopyPath(root.getPath(), dstPath));
    return;
  }

  for (FileStatus child : fs.listStatus(root.getPath())) {
    getDirectoryListing(child, fs, result, new Path(dstPath, child.getPath()
        .getName()));
  }
}
 
Example 10
Source File: Util.java    From spork with Apache License 2.0 5 votes vote down vote up
static public void copyFromClusterToLocal(MiniGenericCluster cluster,
           String fileNameOnCluster, String localFileName) throws IOException {
       if(Util.WINDOWS){
           fileNameOnCluster = fileNameOnCluster.replace('\\','/');
           localFileName = localFileName.replace('\\','/');
       }
    File parent = new File(localFileName).getParentFile();
    if (!parent.exists()) {
        parent.mkdirs();
    }
    PrintWriter writer = new PrintWriter(new FileWriter(localFileName));

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            cluster.getProperties()));
       if(!fs.exists(new Path(fileNameOnCluster))) {
           throw new IOException("File " + fileNameOnCluster + " does not exists on the minicluster");
       }

       String line = null;
	   FileStatus fst = fs.getFileStatus(new Path(fileNameOnCluster));
	   if(fst.isDir()) {
	       throw new IOException("Only files from cluster can be copied locally," +
	       		" " + fileNameOnCluster + " is a directory");
	   }
       FSDataInputStream stream = fs.open(new Path(fileNameOnCluster));
       BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
       while( (line = reader.readLine()) != null) {
       	writer.println(line);
       }

       reader.close();
       writer.close();
}
 
Example 11
Source File: IgniteHadoopFileSystemAbstractSelfTest.java    From ignite with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
@Override public int compare(FileStatus o1, FileStatus o2) {
    if (o1 == null || o2 == null)
        return o1 == o2 ? 0 : o1 == null ? -1 : 1;

    return o1.isDir() == o2.isDir() ? o1.getPath().compareTo(o2.getPath()) : o1.isDir() ? -1 : 1;
}
 
Example 12
Source File: FileStatusExtended.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public FileStatusExtended(FileStatus stat, Block[] blocks, String leaseHolder) {
  super(stat.getLen(), stat.isDir(), stat.getReplication(),
      stat.getBlockSize(), stat.getModificationTime(), stat.getAccessTime(),
      stat.getPermission(), stat.getOwner(), stat.getGroup(), 
      stat.getPath());
  this.blocks = blocks;
  this.leaseHolder = (leaseHolder == null) ? "" : leaseHolder;
}
 
Example 13
Source File: DistCp.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private static void updateDestStatus(FileStatus src, FileStatus dst,
    EnumSet<FileAttribute> preserved, FileSystem destFileSys
    ) throws IOException {
  String owner = null;
  String group = null;
  if (preserved.contains(FileAttribute.USER)
      && !src.getOwner().equals(dst.getOwner())) {
    owner = src.getOwner();
  }
  if (preserved.contains(FileAttribute.GROUP)
      && !src.getGroup().equals(dst.getGroup())) {
    group = src.getGroup();
  }
  if (owner != null || group != null) {
    destFileSys.setOwner(dst.getPath(), owner, group);
  }
  if (preserved.contains(FileAttribute.PERMISSION)
      && !src.getPermission().equals(dst.getPermission())) {
    destFileSys.setPermission(dst.getPath(), src.getPermission());
  }
  if (preserved.contains(FileAttribute.TIMES)) {
    try {
      destFileSys.setTimes(dst.getPath(), src.getModificationTime(), src.getAccessTime());
    } catch (IOException exc) {
      if (!dst.isDir()) { //hadoop 0.20 doesn't allow setTimes on dirs
        throw exc;
      }
    }
  }
}
 
Example 14
Source File: HdfsResourceLoader.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private void doRetrieveMatchingResources(Path rootDir, String subPattern, Set<Resource> results) throws IOException {
    if (!this.fs.isFile(rootDir)) {
        FileStatus[] statuses = null;
        statuses = this.fs.listStatus(rootDir);
        if (!ObjectUtils.isEmpty(statuses)) {
            String root = rootDir.toUri().getPath();
            FileStatus[] var6 = statuses;
            int var7 = statuses.length;

            for(int var8 = 0; var8 < var7; ++var8) {
                FileStatus fileStatus = var6[var8];
                Path p = fileStatus.getPath();
                String location = p.toUri().getPath();
                if (location.startsWith(root)) {
                    location = location.substring(root.length());
                }

                if (fileStatus.isDir() && this.pathMatcher.matchStart(subPattern, location)) {
                    this.doRetrieveMatchingResources(p, subPattern, results);
                } else if (this.pathMatcher.match(subPattern.substring(1), location)) {
                    results.add(new HdfsResource(p, this.fs));
                }
            }
        }
    } else if (this.pathMatcher.match(subPattern, stripPrefix(rootDir.toUri().getPath()))) {
        results.add(new HdfsResource(rootDir, this.fs));
    }

}
 
Example 15
Source File: HadoopConnectingFileSystemProvider.java    From CloverETL-Engine with GNU Lesser General Public License v2.1 5 votes vote down vote up
@Override
public HadoopFileStatus getExtendedStatus(URI path) throws IOException {
	checkConnected();
	FileStatus status = dfs.getFileStatus(new Path(path));
	return new HadoopFileStatus(status.getPath().toUri(), status.getLen(), status.isDir(),
			status.getModificationTime(), status.getBlockSize(), status.getGroup(), status.getOwner(),
			status.getReplication());
}
 
Example 16
Source File: FileInputFormat.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  List<IOException> errors = new ArrayList<IOException>();
  
  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);
  
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDir()) {
          for(FileStatus stat: fs.listStatus(globStat.getPath(),
              inputFilter)) {
            result.add(stat);
          }          
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}
 
Example 17
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/** List input directories.
 * Mark this method to be final to make sure this method does not
 * get overridden by any subclass.
 * If a subclass historically overrides this method, now it needs to override
 * {@link #listLocatedStatus(JobContext)} instead.
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
final static protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  List<IOException> errors = new ArrayList<IOException>();
  
  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);
  
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDir()) {
          for(FileStatus stat: fs.listStatus(globStat.getPath(),
              inputFilter)) {
            result.add(stat);
          }          
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size());
  return result;
}
 
Example 18
Source File: GoogleHadoopFileSystemBase.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
private static boolean isImplicitDirectory(FileStatus curr) {
  // Modification time of 0 indicates implicit directory.
  return curr.isDir() && curr.getModificationTime() == 0;
}
 
Example 19
Source File: DistCp.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (!dstroot.isDir()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf,
      dstlsr, Text.class, FileStatus.class,
      SequenceFile.CompressionType.NONE);
  try {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDir()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), child);
          lsrstack.push(child);
        }
      }
    }
  } finally {
    checkAndClose(writer);
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, FileStatus.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  SequenceFile.Reader lsrin = null;
  SequenceFile.Reader dstin = null;
  try {
    lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
    dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final FileStatus lsrstatus = new FileStatus();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final FsShell shell = new FsShell(conf);
    final String[] shellargs = {"-rmr", null};

    boolean hasnext = dstin.next(dstpath, dstfrom);
    for(; lsrin.next(lsrpath, lsrstatus); ) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      for(; hasnext && dst_cmp_lsr < 0; ) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      }
      else {
        //lsrpath does not exist, delete it
        String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
        if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
          shellargs[1] = s;
          int r = 0;
          try {
             r = shell.run(shellargs);
          } catch(Exception e) {
            throw new IOException("Exception from shell.", e);
          }
          if (r != 0) {
            throw new IOException("\"" + shellargs[0] + " " + shellargs[1]
                + "\" returns non-zero value " + r);
          }
        }
      }
    }
  } finally {
    checkAndClose(lsrin);
    checkAndClose(dstin);
  }
}
 
Example 20
Source File: PathPartitionHelper.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
    * Recursively works through all directories, skipping filtered partitions.
    * 
    * @param fs
    * @param fileStatus
    * @param partitionLevel
    * @param partitionKeys
    * @param splitPaths
    * @throws IOException
    */
   private void getPartitionedFiles(ExpressionFactory expressionFactory,
    String partitionExpression, FileSystem fs, FileStatus fileStatus,
    int partitionLevel, String[] partitionKeys,
    List<FileStatus> splitPaths) throws IOException {

String partition = (partitionLevel < partitionKeys.length) ? partitionKeys[partitionLevel]
	: null;

Path path = fileStatus.getPath();

// filter out hidden files
if (path.getName().startsWith("_")) {
    return;
}

// pre filter logic
// return if any of the logic is not true
if (partition != null) {
    if (fileStatus.isDir()) {

	// check that the dir name is equal to that of the partition
	// name
	if (!path.getName().startsWith(partition))
	    return;

    } else {
	// else its a file but not at the end of the partition tree so
	// its ignored.
	return;
    }

    // this means we are inside the partition so that the path will
    // contain all partitions plus its values
    // we can apply the partition filter expression here that was passed
    // to the HiveColumnarLoader.setPartitionExpression
    if (partitionLevel == (partitionKeys.length - 1)
	    && !evaluatePartitionExpression(expressionFactory,
		    partitionExpression, path)) {

	LOG.debug("Pruning partition: " + path);
	return;

    }

}

// after this point we now that the partition is either null
// which means we are at the end of the partition tree and all files
// sub directories should be included.
// or that we are still navigating the partition tree.
int nextPartitionLevel = partitionLevel + 1;

// iterate over directories if fileStatus is a dir.
FileStatus[] childStatusArr = null;

if (fileStatus.isDir()) {
    if ((childStatusArr = fs.listStatus(path)) != null) {
	for (FileStatus childFileStatus : childStatusArr) {
	    getPartitionedFiles(expressionFactory, partitionExpression,
		    fs, childFileStatus, nextPartitionLevel,
		    partitionKeys, splitPaths);
	}
    }
} else {
    // add file to splitPaths
    splitPaths.add(fileStatus);
}

   }