org.apache.hadoop.fs.FileStatus#isDir

Source File: MergeCommand.java From parquet-mr with Apache License 2.0

6 votes

/**
 * Get all input files.
 * @param input input files or directory.
 * @return ordered input files.
 */
private List<Path> getInputFiles(List<String> input) throws IOException {
  List<Path> inputFiles = null;

  if (input.size() == 1) {
    Path p = new Path(input.get(0));
    FileSystem fs = p.getFileSystem(conf);
    FileStatus status = fs.getFileStatus(p);

    if (status.isDir()) {
      inputFiles = getInputFilesFromDirectory(status);
    }
  } else {
    inputFiles = parseInputFiles(input);
  }

  checkParquetFiles(inputFiles);

  return inputFiles;
}

Source File: AvroStorageUtils.java From Cubert with Apache License 2.0

6 votes

/** get last file of a hdfs path if it is  a directory;
 *   or return the file itself if path is a file
 */
public static Path getLast(Path path, FileSystem fs) throws IOException {

    FileStatus status = fs.getFileStatus(path);
    if (!status.isDir()) {
        return path;
    }
    FileStatus[] statuses = fs.listStatus(path, PATH_FILTER);

    if (statuses.length == 0) {
        return null;
    } else {
        Arrays.sort(statuses);
        for (int i = statuses.length - 1; i >= 0; i--) {
            if (!statuses[i].isDir()) {
                return statuses[i].getPath();
            }
        }
        return null;
    }
}

Source File: MapRedUtil.java From spork with Apache License 2.0

6 votes

/**
 * Returns the total number of bytes for this file, or if a directory all
 * files in the directory.
 * 
 * @param fs FileSystem
 * @param status FileStatus
 * @param max Maximum value of total length that will trigger exit. Many
 * times we're only interested whether the total length of files is greater
 * than X or not. In such case, we can exit the function early as soon as
 * the max is reached.
 * @return
 * @throws IOException
 */
public static long getPathLength(FileSystem fs, FileStatus status, long max)
        throws IOException {
    if (!status.isDir()) {
        return status.getLen();
    } else {
        FileStatus[] children = fs.listStatus(
                status.getPath(), hiddenFileFilter);
        long size = 0;
        for (FileStatus child : children) {
            size += getPathLength(fs, child, max);
            if (size > max) return size;
        }
        return size;
    }
}

Source File: TestMapRed.java From RDFS with Apache License 2.0

6 votes

private static void printFiles(Path dir, 
                               Configuration conf) throws IOException {
  FileSystem fs = dir.getFileSystem(conf);
  for(FileStatus f: fs.listStatus(dir)) {
    System.out.println("Reading " + f.getPath() + ": ");
    if (f.isDir()) {
      System.out.println("  it is a map file.");
      printSequenceFile(fs, new Path(f.getPath(), "data"), conf);
    } else if (isSequenceFile(fs, f.getPath())) {
      System.out.println("  it is a sequence file.");
      printSequenceFile(fs, f.getPath(), conf);
    } else {
      System.out.println("  it is a text file.");
      printTextFile(fs, f.getPath());
    }
  }
}

Source File: DirectoryMonitorDiscovery.java From flink with Apache License 2.0

6 votes

private static void listStatusRecursively(
		FileSystem fs,
		FileStatus fileStatus,
		int level,
		int expectLevel,
		List<FileStatus> results) throws IOException {
	if (expectLevel == level) {
		results.add(fileStatus);
		return;
	}

	if (fileStatus.isDir()) {
		for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
			listStatusRecursively(fs, stat, level + 1, expectLevel, results);
		}
	}
}

Source File: AvroHdfsFileReader.java From ml-ease with Apache License 2.0

6 votes

@Override
protected List<Path> getPaths(String filePath) throws IOException
{
  Path path = new Path(filePath);
  FileSystem fs = path.getFileSystem(getConf());
  List<Path> paths = new ArrayList<Path>();
  
  for (FileStatus status: fs.listStatus(path))
  {
    if (status.isDir() && !AvroUtils.shouldPathBeIgnored(status.getPath()))
    {
      paths.addAll(getPaths(status.getPath().toString()));
    }
    else if (isAvro(status.getPath()))
    {
      paths.add(status.getPath());
    }
  }
  return paths;
}

Source File: MapRedUtil.java From spork with Apache License 2.0

6 votes

/**
 * Get all files recursively from the given list of files
 *
 * @param files a list of FileStatus
 * @param conf the configuration object
 * @return the list of fileStatus that contains all the files in the given
 *         list and, recursively, all the files inside the directories in
 *         the given list
 * @throws IOException
 */
public static List<FileStatus> getAllFileRecursively(
        List<FileStatus> files, Configuration conf) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    int len = files.size();
    for (int i = 0; i < len; ++i) {
        FileStatus file = files.get(i);
        if (file.isDir()) {
            Path p = file.getPath();
            FileSystem fs = p.getFileSystem(conf);
            addInputPathRecursively(result, fs, p, hiddenFileFilter);
        } else {
            result.add(file);
        }
    }
    log.info("Total input paths to process : " + result.size());
    return result;
}

Source File: RaidNode.java From RDFS with Apache License 2.0

6 votes

public static List<FileStatus> listDirectoryRaidFileStatus(
		Configuration conf, FileSystem srcFs, Path p) throws IOException {
	long minFileSize = conf.getLong(MINIMUM_RAIDABLE_FILESIZE_KEY,
			MINIMUM_RAIDABLE_FILESIZE);
	List<FileStatus> lfs = new ArrayList<FileStatus>();
	FileStatus[] files = srcFs.listStatus(p);
	for (FileStatus stat : files) {
		if (stat.isDir()) {
			return null;
		}
		// We don't raid too small files
		if (stat.getLen() < minFileSize) {
			continue;
		}
		lfs.add(stat);
	}
	if (lfs.size() == 0)
		return null;
	return lfs;
}

Source File: FastCopy.java From RDFS with Apache License 2.0

5 votes

/**
 * Recursively lists out all the files under a given path.
 *
 * @param root
 *          the path under which we want to list out files
 * @param fs
 *          the filesystem
 * @param result
 *          the list which holds all the files.
 * @throws IOException
 */
private static void getDirectoryListing(FileStatus root, FileSystem fs,
    List<CopyPath> result, Path dstPath) throws IOException {
  if (!root.isDir()) {
    result.add(new CopyPath(root.getPath(), dstPath));
    return;
  }

  for (FileStatus child : fs.listStatus(root.getPath())) {
    getDirectoryListing(child, fs, result, new Path(dstPath, child.getPath()
        .getName()));
  }
}

Source File: Util.java From spork with Apache License 2.0

5 votes

static public void copyFromClusterToLocal(MiniGenericCluster cluster,
           String fileNameOnCluster, String localFileName) throws IOException {
       if(Util.WINDOWS){
           fileNameOnCluster = fileNameOnCluster.replace('\\','/');
           localFileName = localFileName.replace('\\','/');
       }
    File parent = new File(localFileName).getParentFile();
    if (!parent.exists()) {
        parent.mkdirs();
    }
    PrintWriter writer = new PrintWriter(new FileWriter(localFileName));

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            cluster.getProperties()));
       if(!fs.exists(new Path(fileNameOnCluster))) {
           throw new IOException("File " + fileNameOnCluster + " does not exists on the minicluster");
       }

       String line = null;
	   FileStatus fst = fs.getFileStatus(new Path(fileNameOnCluster));
	   if(fst.isDir()) {
	       throw new IOException("Only files from cluster can be copied locally," +
	       		" " + fileNameOnCluster + " is a directory");
	   }
       FSDataInputStream stream = fs.open(new Path(fileNameOnCluster));
       BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
       while( (line = reader.readLine()) != null) {
       	writer.println(line);
       }

       reader.close();
       writer.close();
}

Source File: IgniteHadoopFileSystemAbstractSelfTest.java From ignite with Apache License 2.0

5 votes

@SuppressWarnings("deprecation")
@Override public int compare(FileStatus o1, FileStatus o2) {
    if (o1 == null || o2 == null)
        return o1 == o2 ? 0 : o1 == null ? -1 : 1;

    return o1.isDir() == o2.isDir() ? o1.getPath().compareTo(o2.getPath()) : o1.isDir() ? -1 : 1;
}

Source File: FileStatusExtended.java From RDFS with Apache License 2.0

5 votes

public FileStatusExtended(FileStatus stat, Block[] blocks, String leaseHolder) {
  super(stat.getLen(), stat.isDir(), stat.getReplication(),
      stat.getBlockSize(), stat.getModificationTime(), stat.getAccessTime(),
      stat.getPermission(), stat.getOwner(), stat.getGroup(), 
      stat.getPath());
  this.blocks = blocks;
  this.leaseHolder = (leaseHolder == null) ? "" : leaseHolder;
}

Source File: DistCp.java From RDFS with Apache License 2.0

5 votes

private static void updateDestStatus(FileStatus src, FileStatus dst,
    EnumSet<FileAttribute> preserved, FileSystem destFileSys
    ) throws IOException {
  String owner = null;
  String group = null;
  if (preserved.contains(FileAttribute.USER)
      && !src.getOwner().equals(dst.getOwner())) {
    owner = src.getOwner();
  }
  if (preserved.contains(FileAttribute.GROUP)
      && !src.getGroup().equals(dst.getGroup())) {
    group = src.getGroup();
  }
  if (owner != null || group != null) {
    destFileSys.setOwner(dst.getPath(), owner, group);
  }
  if (preserved.contains(FileAttribute.PERMISSION)
      && !src.getPermission().equals(dst.getPermission())) {
    destFileSys.setPermission(dst.getPath(), src.getPermission());
  }
  if (preserved.contains(FileAttribute.TIMES)) {
    try {
      destFileSys.setTimes(dst.getPath(), src.getModificationTime(), src.getAccessTime());
    } catch (IOException exc) {
      if (!dst.isDir()) { //hadoop 0.20 doesn't allow setTimes on dirs
        throw exc;
      }
    }
  }
}

Source File: HdfsResourceLoader.java From ambiverse-nlu with Apache License 2.0

5 votes

private void doRetrieveMatchingResources(Path rootDir, String subPattern, Set<Resource> results) throws IOException {
    if (!this.fs.isFile(rootDir)) {
        FileStatus[] statuses = null;
        statuses = this.fs.listStatus(rootDir);
        if (!ObjectUtils.isEmpty(statuses)) {
            String root = rootDir.toUri().getPath();
            FileStatus[] var6 = statuses;
            int var7 = statuses.length;

            for(int var8 = 0; var8 < var7; ++var8) {
                FileStatus fileStatus = var6[var8];
                Path p = fileStatus.getPath();
                String location = p.toUri().getPath();
                if (location.startsWith(root)) {
                    location = location.substring(root.length());
                }

                if (fileStatus.isDir() && this.pathMatcher.matchStart(subPattern, location)) {
                    this.doRetrieveMatchingResources(p, subPattern, results);
                } else if (this.pathMatcher.match(subPattern.substring(1), location)) {
                    results.add(new HdfsResource(p, this.fs));
                }
            }
        }
    } else if (this.pathMatcher.match(subPattern, stripPrefix(rootDir.toUri().getPath()))) {
        results.add(new HdfsResource(rootDir, this.fs));
    }

}

Source File: HadoopConnectingFileSystemProvider.java From CloverETL-Engine with GNU Lesser General Public License v2.1

5 votes

@Override
public HadoopFileStatus getExtendedStatus(URI path) throws IOException {
	checkConnected();
	FileStatus status = dfs.getFileStatus(new Path(path));
	return new HadoopFileStatus(status.getPath().toUri(), status.getLen(), status.isDir(),
			status.getModificationTime(), status.getBlockSize(), status.getGroup(), status.getOwner(),
			status.getReplication());
}

Source File: FileInputFormat.java From hadoop-gpu with Apache License 2.0

4 votes

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  List<IOException> errors = new ArrayList<IOException>();
  
  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);
  
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDir()) {
          for(FileStatus stat: fs.listStatus(globStat.getPath(),
              inputFilter)) {
            result.add(stat);
          }          
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}

Source File: FileInputFormat.java From RDFS with Apache License 2.0

4 votes

/** List input directories.
 * Mark this method to be final to make sure this method does not
 * get overridden by any subclass.
 * If a subclass historically overrides this method, now it needs to override
 * {@link #listLocatedStatus(JobContext)} instead.
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
final static protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  List<IOException> errors = new ArrayList<IOException>();
  
  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);
  
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDir()) {
          for(FileStatus stat: fs.listStatus(globStat.getPath(),
              inputFilter)) {
            result.add(stat);
          }          
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  LOG.info("Total input paths to process : " + result.size());
  return result;
}

Source File: GoogleHadoopFileSystemBase.java From hadoop-connectors with Apache License 2.0

4 votes

private static boolean isImplicitDirectory(FileStatus curr) {
  // Modification time of 0 indicates implicit directory.
  return curr.isDir() && curr.getModificationTime() == 0;
}

Source File: DistCp.java From hadoop-gpu with Apache License 2.0

4 votes

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(
    FileSystem dstfs, FileStatus dstroot, Path dstsorted,
    FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf
    ) throws IOException {
  if (!dstroot.isDir()) {
    throw new IOException("dst must be a directory when option "
        + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath()
        + ") is not a directory.");
  }

  //write dst lsr results
  final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
  final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf,
      dstlsr, Text.class, FileStatus.class,
      SequenceFile.CompressionType.NONE);
  try {
    //do lsr to get all file statuses in dstroot
    final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
    for(lsrstack.push(dstroot); !lsrstack.isEmpty(); ) {
      final FileStatus status = lsrstack.pop();
      if (status.isDir()) {
        for(FileStatus child : dstfs.listStatus(status.getPath())) {
          String relative = makeRelative(dstroot.getPath(), child.getPath());
          writer.append(new Text(relative), child);
          lsrstack.push(child);
        }
      }
    }
  } finally {
    checkAndClose(writer);
  }

  //sort lsr results
  final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
  SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
      new Text.Comparator(), Text.class, FileStatus.class, jobconf);
  sorter.sort(dstlsr, sortedlsr);

  //compare lsr list and dst list  
  SequenceFile.Reader lsrin = null;
  SequenceFile.Reader dstin = null;
  try {
    lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
    dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

    //compare sorted lsr list and sorted dst list
    final Text lsrpath = new Text();
    final FileStatus lsrstatus = new FileStatus();
    final Text dstpath = new Text();
    final Text dstfrom = new Text();
    final FsShell shell = new FsShell(conf);
    final String[] shellargs = {"-rmr", null};

    boolean hasnext = dstin.next(dstpath, dstfrom);
    for(; lsrin.next(lsrpath, lsrstatus); ) {
      int dst_cmp_lsr = dstpath.compareTo(lsrpath);
      for(; hasnext && dst_cmp_lsr < 0; ) {
        hasnext = dstin.next(dstpath, dstfrom);
        dst_cmp_lsr = dstpath.compareTo(lsrpath);
      }
      
      if (dst_cmp_lsr == 0) {
        //lsrpath exists in dst, skip it
        hasnext = dstin.next(dstpath, dstfrom);
      }
      else {
        //lsrpath does not exist, delete it
        String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
        if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
          shellargs[1] = s;
          int r = 0;
          try {
             r = shell.run(shellargs);
          } catch(Exception e) {
            throw new IOException("Exception from shell.", e);
          }
          if (r != 0) {
            throw new IOException("\"" + shellargs[0] + " " + shellargs[1]
                + "\" returns non-zero value " + r);
          }
        }
      }
    }
  } finally {
    checkAndClose(lsrin);
    checkAndClose(dstin);
  }
}

Source File: PathPartitionHelper.java From spork with Apache License 2.0

4 votes

/**
    * Recursively works through all directories, skipping filtered partitions.
    * 
    * @param fs
    * @param fileStatus
    * @param partitionLevel
    * @param partitionKeys
    * @param splitPaths
    * @throws IOException
    */
   private void getPartitionedFiles(ExpressionFactory expressionFactory,
    String partitionExpression, FileSystem fs, FileStatus fileStatus,
    int partitionLevel, String[] partitionKeys,
    List<FileStatus> splitPaths) throws IOException {

String partition = (partitionLevel < partitionKeys.length) ? partitionKeys[partitionLevel]
	: null;

Path path = fileStatus.getPath();

// filter out hidden files
if (path.getName().startsWith("_")) {
    return;
}

// pre filter logic
// return if any of the logic is not true
if (partition != null) {
    if (fileStatus.isDir()) {

	// check that the dir name is equal to that of the partition
	// name
	if (!path.getName().startsWith(partition))
	    return;

    } else {
	// else its a file but not at the end of the partition tree so
	// its ignored.
	return;
    }

    // this means we are inside the partition so that the path will
    // contain all partitions plus its values
    // we can apply the partition filter expression here that was passed
    // to the HiveColumnarLoader.setPartitionExpression
    if (partitionLevel == (partitionKeys.length - 1)
	    && !evaluatePartitionExpression(expressionFactory,
		    partitionExpression, path)) {

	LOG.debug("Pruning partition: " + path);
	return;

    }

}

// after this point we now that the partition is either null
// which means we are at the end of the partition tree and all files
// sub directories should be included.
// or that we are still navigating the partition tree.
int nextPartitionLevel = partitionLevel + 1;

// iterate over directories if fileStatus is a dir.
FileStatus[] childStatusArr = null;

if (fileStatus.isDir()) {
    if ((childStatusArr = fs.listStatus(path)) != null) {
	for (FileStatus childFileStatus : childStatusArr) {
	    getPartitionedFiles(expressionFactory, partitionExpression,
		    fs, childFileStatus, nextPartitionLevel,
		    partitionKeys, splitPaths);
	}
    }
} else {
    // add file to splitPaths
    splitPaths.add(fileStatus);
}

   }

Java Code Examples for org.apache.hadoop.fs.FileStatus#isDir()