Java Code Examples for org.apache.hadoop.fs.PathFilter#accept()

The following examples show how to use org.apache.hadoop.fs.PathFilter#accept() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HDFSFile.java    From incubator-iotdb with Apache License 2.0 6 votes vote down vote up
private List<HDFSFile> listFiles(String fileFolder, PathFilter pathFilter) {
  List<HDFSFile> files = new ArrayList<>();
  try {
    Path path = new Path(fileFolder);
    for (FileStatus fileStatus : fs.listStatus(path)) {
      Path filePath = fileStatus.getPath();
      if (pathFilter.accept(filePath)) {
        HDFSFile file = new HDFSFile(filePath.toUri().toString());
        files.add(file);
      }
    }
  } catch (IOException e) {
    logger.error("Failed to list files in {}. ", fileFolder);
  }
  return files;
}
 
Example 2
Source File: FileInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}
 
Example 3
Source File: FileUtils.java    From streamx with Apache License 2.0 6 votes vote down vote up
private static ArrayList<FileStatus> traverseImpl(Storage storage, Path path, PathFilter filter)
    throws IOException {
  if (!storage.exists(path.toString())) {
    return new ArrayList<>();
  }
  ArrayList<FileStatus> result = new ArrayList<>();
  FileStatus[] statuses = storage.listStatus(path.toString());
  for (FileStatus status : statuses) {
    if (status.isDirectory()) {
      result.addAll(traverseImpl(storage, status.getPath(), filter));
    } else {
      if (filter.accept(status.getPath())) {
        result.add(status);
      }
    }
  }
  return result;
}
 
Example 4
Source File: S3PartitionedOutputCommitter.java    From s3committer with Apache License 2.0 6 votes vote down vote up
@Override
protected List<FileStatus> getTaskOutput(TaskAttemptContext context)
    throws IOException {
  PathFilter filter = HiddenPathFilter.get();

  // get files on the local FS in the attempt path
  Path attemptPath = getTaskAttemptPath(context);
  FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration());
  RemoteIterator<LocatedFileStatus> iter = attemptFS
      .listFiles(attemptPath, true /* recursive */ );

  List<FileStatus> stats = Lists.newArrayList();
  while (iter.hasNext()) {
    FileStatus stat = iter.next();
    if (filter.accept(stat.getPath())) {
      stats.add(stat);
    }
  }

  return stats;
}
 
Example 5
Source File: HistoryFileManager.java    From big-c with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
protected static List<FileStatus> scanDirectory(Path path, FileContext fc,
    PathFilter pathFilter) throws IOException {
  path = fc.makeQualified(path);
  List<FileStatus> jhStatusList = new ArrayList<FileStatus>();
  try {
    RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path);
    while (fileStatusIter.hasNext()) {
      FileStatus fileStatus = fileStatusIter.next();
      Path filePath = fileStatus.getPath();
      if (fileStatus.isFile() && pathFilter.accept(filePath)) {
        jhStatusList.add(fileStatus);
      }
    }
  } catch (FileNotFoundException fe) {
    LOG.error("Error while scanning directory " + path, fe);
  }
  return jhStatusList;
}
 
Example 6
Source File: HistoryFileManager.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
protected static List<FileStatus> scanDirectory(Path path, FileContext fc,
    PathFilter pathFilter) throws IOException {
  path = fc.makeQualified(path);
  List<FileStatus> jhStatusList = new ArrayList<FileStatus>();
  try {
    RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path);
    while (fileStatusIter.hasNext()) {
      FileStatus fileStatus = fileStatusIter.next();
      Path filePath = fileStatus.getPath();
      if (fileStatus.isFile() && pathFilter.accept(filePath)) {
        jhStatusList.add(fileStatus);
      }
    }
  } catch (FileNotFoundException fe) {
    LOG.error("Error while scanning directory " + path, fe);
  }
  return jhStatusList;
}
 
Example 7
Source File: GenerateData.java    From hadoop with Apache License 2.0 6 votes vote down vote up
static DataStatistics publishPlainDataStatistics(Configuration conf, 
                                                 Path inputDir) 
throws IOException {
  FileSystem fs = inputDir.getFileSystem(conf);

  // obtain input data file statuses
  long dataSize = 0;
  long fileCount = 0;
  RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
  PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
  while (iter.hasNext()) {
    LocatedFileStatus lStatus = iter.next();
    if (filter.accept(lStatus.getPath())) {
      dataSize += lStatus.getLen();
      ++fileCount;
    }
  }

  // publish the plain data statistics
  LOG.info("Total size of input data : " 
           + StringUtils.humanReadableInt(dataSize));
  LOG.info("Total number of input data files : " + fileCount);
  
  return new DataStatistics(dataSize, fileCount, false);
}
 
Example 8
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
public boolean accept(Path path) {
  for (PathFilter filter : filters) {
    if (!filter.accept(path)) {
      return false;
    }
  }
  return true;
}
 
Example 9
Source File: FileAndDirectoryInputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
public boolean accept(Path path) {
    for (PathFilter filter : filters) {
        if (!filter.accept(path)) {
            return false;
        }
    }
    return true;
}
 
Example 10
Source File: MemoryStorage.java    From streamx with Apache License 2.0 5 votes vote down vote up
@Override
public FileStatus[] listStatus(String path, PathFilter filter) throws IOException {
  if (failure == Failure.listStatusFailure) {
    failure = Failure.noFailure;
    throw new IOException("listStatus failed.");
  }
  List<FileStatus> result = new ArrayList<>();
  for (String key: data.keySet()) {
    if (key.startsWith(path) && filter.accept(new Path(key))) {
        FileStatus status = new FileStatus(data.get(key).size(), false, 1, 0, 0, 0, null, null, null, new Path(key));
        result.add(status);
    }
  }
  return result.toArray(new FileStatus[result.size()]);
}
 
Example 11
Source File: HCatMetadataServiceTest.java    From hadoop-etl-udfs with MIT License 5 votes vote down vote up
@Override
public FileStatus[] listStatus(Path path, PathFilter filter) throws IOException {
    List<FileStatus> subPaths = pathsAndContent.get(path.toString());
    List<FileStatus> filteredSubPaths = new ArrayList<>();
    for (FileStatus subPath : subPaths) {
        if (filter.accept(subPath.getPath())) {
            filteredSubPaths.add(fakeFileStatus(subPath.getPath().toString()));
        }
    }
    return filteredSubPaths.toArray(new FileStatus[filteredSubPaths.size()]);
}
 
Example 12
Source File: ContainerFileSystem.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
protected RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f, final PathFilter filter) throws FileNotFoundException, IOException {
  final String container = getContainerName(f);
  final PathFilter alteredFilter = (path) -> {
    return filter.accept(transform(path, container));
  };

  return RemoteIterators.transform(
      ListAccessor.listLocatedFileStatus(getFileSystemForPath(f).fs(), pathWithoutContainer(f), alteredFilter),
      t -> new LocatedFileStatus(ContainerFileSystem.transform(t, container), t.getBlockLocations())
      );
}
 
Example 13
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 14
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public boolean accept(Path path) {
  for (PathFilter filter : filters) {
    if (!filter.accept(path)) {
      return false;
    }
  }
  return true;
}
 
Example 15
Source File: CombineFileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public boolean accept(Path path) {
  for (PathFilter filter : filters) {
    if (filter.accept(path)) {
      return true;
    }
  }
  return false;
}
 
Example 16
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (Path p: dirs) {
    FileSystem fs = p.getFileSystem(job); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }
  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 17
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 18
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public boolean accept(Path path) {
  for (PathFilter filter : filters) {
    if (!filter.accept(path)) {
      return false;
    }
  }
  return true;
}
 
Example 19
Source File: JobHistoryUtils.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static List<FileStatus> listFilteredStatus(FileContext fc, Path root,
    PathFilter filter) throws IOException {
  List<FileStatus> fsList = remoteIterToList(fc.listStatus(root));
  if (filter == null) {
    return fsList;
  } else {
    List<FileStatus> filteredList = new LinkedList<FileStatus>();
    for (FileStatus fs : fsList) {
      if (filter.accept(fs.getPath())) {
        filteredList.add(fs);
      }
    }
    return filteredList;
  }
}
 
Example 20
Source File: CombineFileInputFormat.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
public boolean accept(Path path) {
  for (PathFilter filter : filters) {
    if (filter.accept(path)) {
      return true;
    }
  }
  return false;
}