Java Code Examples for org.apache.hadoop.fs.RemoteIterator#hasNext()

The following examples show how to use org.apache.hadoop.fs.RemoteIterator#hasNext() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FSAgent.java    From Bats with Apache License 2.0 7 votes vote down vote up
public List<String> listFiles(String dir) throws IOException
{
  List<String> files = new ArrayList<>();
  Path path = new Path(dir);

  FileStatus fileStatus = fileSystem.getFileStatus(path);
  if (!fileStatus.isDirectory()) {
    throw new FileNotFoundException("Cannot read directory " + dir);
  }
  RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
  while (it.hasNext()) {
    LocatedFileStatus lfs = it.next();
    files.add(lfs.getPath().getName());
  }
  return files;
}
 
Example 2
Source File: GenerateData.java    From hadoop with Apache License 2.0 6 votes vote down vote up
static DataStatistics publishPlainDataStatistics(Configuration conf, 
                                                 Path inputDir) 
throws IOException {
  FileSystem fs = inputDir.getFileSystem(conf);

  // obtain input data file statuses
  long dataSize = 0;
  long fileCount = 0;
  RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
  PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
  while (iter.hasNext()) {
    LocatedFileStatus lStatus = iter.next();
    if (filter.accept(lStatus.getPath())) {
      dataSize += lStatus.getLen();
      ++fileCount;
    }
  }

  // publish the plain data statistics
  LOG.info("Total size of input data : " 
           + StringUtils.humanReadableInt(dataSize));
  LOG.info("Total number of input data files : " + fileCount);
  
  return new DataStatistics(dataSize, fileCount, false);
}
 
Example 3
Source File: ResourceLocalizationService.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void cleanUpFilesPerUserDir(FileContext lfs, DeletionService del,
    Path userDirPath) throws IOException {
  RemoteIterator<FileStatus> userDirStatus = lfs.listStatus(userDirPath);
  FileDeletionTask dependentDeletionTask =
      del.createFileDeletionTask(null, userDirPath, new Path[] {});
  if (userDirStatus != null && userDirStatus.hasNext()) {
    List<FileDeletionTask> deletionTasks = new ArrayList<FileDeletionTask>();
    while (userDirStatus.hasNext()) {
      FileStatus status = userDirStatus.next();
      String owner = status.getOwner();
      FileDeletionTask deletionTask =
          del.createFileDeletionTask(owner, null,
            new Path[] { status.getPath() });
      deletionTask.addFileDeletionTaskDependency(dependentDeletionTask);
      deletionTasks.add(deletionTask);
    }
    for (FileDeletionTask task : deletionTasks) {
      del.scheduleFileDeletionTask(task);
    }
  } else {
    del.scheduleFileDeletionTask(dependentDeletionTask);
  }
}
 
Example 4
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}
 
Example 5
Source File: TestEncryptionZones.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Checks that an encryption zone with the specified keyName and path (if not
 * null) is present.
 *
 * @throws IOException if a matching zone could not be found
 */
public void assertZonePresent(String keyName, String path) throws IOException {
  final RemoteIterator<EncryptionZone> it = dfsAdmin.listEncryptionZones();
  boolean match = false;
  while (it.hasNext()) {
    EncryptionZone zone = it.next();
    boolean matchKey = (keyName == null);
    boolean matchPath = (path == null);
    if (keyName != null && zone.getKeyName().equals(keyName)) {
      matchKey = true;
    }
    if (path != null && zone.getPath().equals(path)) {
      matchPath = true;
    }
    if (matchKey && matchPath) {
      match = true;
      break;
    }
  }
  assertTrue("Did not find expected encryption zone with keyName " + keyName +
          " path " + path, match
  );
}
 
Example 6
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}
 
Example 7
Source File: CryptoAdmin.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public int run(Configuration conf, List<String> args) throws IOException {
  if (!args.isEmpty()) {
    System.err.println("Can't understand argument: " + args.get(0));
    return 1;
  }

  final DistributedFileSystem dfs = AdminHelper.getDFS(conf);
  try {
    final TableListing listing = new TableListing.Builder()
      .addField("").addField("", true)
      .wrapWidth(AdminHelper.MAX_LINE_WIDTH).hideHeaders().build();
    final RemoteIterator<EncryptionZone> it = dfs.listEncryptionZones();
    while (it.hasNext()) {
      EncryptionZone ez = it.next();
      listing.addRow(ez.getPath(), ez.getKeyName());
    }
    System.out.println(listing.toString());
  } catch (IOException e) {
    System.err.println(prettifyException(e));
    return 2;
  }

  return 0;
}
 
Example 8
Source File: TestRetryCacheWithHA.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
boolean checkNamenodeBeforeReturn() throws Exception {
  for (int i = 0; i < CHECKTIMES; i++) {
    RemoteIterator<CacheDirectiveEntry> iter =
        dfs.listCacheDirectives(
            new CacheDirectiveInfo.Builder().
                setPool(directive.getPool()).
                setPath(directive.getPath()).
                build());
    while (iter.hasNext()) {
      CacheDirectiveInfo result = iter.next().getInfo();
      if ((result.getId() == id) &&
          (result.getReplication().shortValue() == newReplication)) {
        return true;
      }
    }
    Thread.sleep(1000);
  }
  return false;
}
 
Example 9
Source File: FSAgent.java    From Bats with Apache License 2.0 6 votes vote down vote up
public List<LocatedFileStatus> listFilesInfo(String dir) throws IOException
{
  List<LocatedFileStatus> files = new ArrayList<>();
  Path path = new Path(dir);

  FileStatus fileStatus = fileSystem.getFileStatus(path);
  if (!fileStatus.isDirectory()) {
    throw new FileNotFoundException("Cannot read directory " + dir);
  }
  RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
  while (it.hasNext()) {
    LocatedFileStatus lfs = it.next();
    files.add(lfs);
  }
  return files;
}
 
Example 10
Source File: HDFSResourceStore.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void visitFolderImpl(String folderPath, boolean recursive, VisitFilter filter, boolean loadContent,
        Visitor visitor) throws IOException {
    Path p = getRealHDFSPath(folderPath);
    if (!fs.exists(p) || !fs.isDirectory(p)) {
        return;
    }

    String fsPathPrefix = p.toUri().getPath();
    String resPathPrefix = folderPath.endsWith("/") ? folderPath : folderPath + "/";

    RemoteIterator<LocatedFileStatus> it = fs.listFiles(p, recursive);
    while (it.hasNext()) {
        LocatedFileStatus status = it.next();
        if (status.isDirectory())
            continue;

        String path = status.getPath().toUri().getPath();
        if (!path.startsWith(fsPathPrefix))
            throw new IllegalStateException("File path " + path + " is supposed to start with " + fsPathPrefix);

        String resPath = resPathPrefix + path.substring(fsPathPrefix.length() + 1);

        if (filter.matches(resPath, status.getModificationTime())) {
            RawResource raw;
            if (loadContent)
                raw = new RawResource(resPath, status.getModificationTime(), fs.open(status.getPath()));
            else
                raw = new RawResource(resPath, status.getModificationTime());

            try {
                visitor.visit(raw);
            } finally {
                raw.close();
            }
        }
    }
}
 
Example 11
Source File: TestRetryCacheWithHA.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
boolean checkNamenodeBeforeReturn() throws Exception {
  for (int i = 0; i < CHECKTIMES; i++) {
    RemoteIterator<CachePoolEntry> iter = dfs.listCachePools();
    if (!iter.hasNext()) {
      return true;
    }
    Thread.sleep(1000);
  }
  return false;
}
 
Example 12
Source File: PrestoS3FileSystem.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public FileStatus[] listStatus(Path path)
        throws IOException
{
    STATS.newListStatusCall();
    List<LocatedFileStatus> list = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> iterator = listLocatedStatus(path);
    while (iterator.hasNext()) {
        list.add(iterator.next());
    }
    return toArray(list, LocatedFileStatus.class);
}
 
Example 13
Source File: TestLogAggregationService.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private int numOfLogsAvailable(LogAggregationService logAggregationService,
    ApplicationId appId, boolean sizeLimited, String lastLogFile)
    throws IOException {
  Path appLogDir = logAggregationService.getRemoteAppLogDir(appId, this.user);
  RemoteIterator<FileStatus> nodeFiles = null;
  try {
    Path qualifiedLogDir =
        FileContext.getFileContext(this.conf).makeQualified(appLogDir);
    nodeFiles =
        FileContext.getFileContext(qualifiedLogDir.toUri(), this.conf)
          .listStatus(appLogDir);
  } catch (FileNotFoundException fnf) {
    return -1;
  }
  int count = 0;
  while (nodeFiles.hasNext()) {
    FileStatus status = nodeFiles.next();
    String filename = status.getPath().getName();
    if (filename.contains(LogAggregationUtils.TMP_FILE_SUFFIX)
        || (lastLogFile != null && filename.contains(lastLogFile)
            && sizeLimited)) {
      return -1;
    }
    if (filename.contains(LogAggregationUtils
      .getNodeString(logAggregationService.getNodeId()))) {
      count++;
    }
  }
  return count;
}
 
Example 14
Source File: FSStorageAgent.java    From Bats with Apache License 2.0 5 votes vote down vote up
@Override
public long[] getWindowIds(int operatorId) throws IOException
{
  Path lPath = new Path(path + Path.SEPARATOR + String.valueOf(operatorId));
  try {
    FileStatus status = fileContext.getFileStatus(lPath);
    if (!status.isDirectory()) {
      throw new RuntimeException("Checkpoint location is not a directory");
    }
  } catch (FileNotFoundException ex) {
    // During initialization checkpoint directory may not exists.
    fileContext.mkdir(lPath, FsPermission.getDirDefault(), true);
  }

  RemoteIterator<FileStatus> fileStatusRemoteIterator = fileContext.listStatus(lPath);
  List<Long> lwindows = new ArrayList<>();
  while (fileStatusRemoteIterator.hasNext()) {
    FileStatus fileStatus = fileStatusRemoteIterator.next();
    String name = fileStatus.getPath().getName();
    if (name.equals(TMP_FILE)) {
      continue;
    }
    lwindows.add(STATELESS_CHECKPOINT_WINDOW_ID.equals(name) ? Stateless.WINDOW_ID : Long.parseLong(name, 16));
  }
  long[] windowIds = new long[lwindows.size()];
  for (int i = 0; i < windowIds.length; i++) {
    windowIds[i] = lwindows.get(i);
  }
  return windowIds;
}
 
Example 15
Source File: HdfsIOBenchmark.java    From incubator-crail with Apache License 2.0 5 votes vote down vote up
void browseDir() throws Exception {
	System.out.println("reading enumarate dir, path " + path);
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(conf); 
	
	//benchmark
	System.out.println("starting benchmark...");
	RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false);
	while (iter.hasNext()) {
		LocatedFileStatus status = iter.next();
		System.out.println(status.getPath());
	}		
	fs.close();
}
 
Example 16
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 17
Source File: TestRetryCacheWithHA.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
boolean checkNamenodeBeforeReturn() throws Exception {
  for (int i = 0; i < CHECKTIMES; i++) {
    RemoteIterator<CachePoolEntry> iter = dfs.listCachePools();
    if (iter.hasNext() && iter.next().getInfo().getLimit() == 99) {
      return true;
    }
    Thread.sleep(1000);
  }
  return false;
}
 
Example 18
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job,
                                                                               boolean isRecursive, boolean directoriesOnly,
                                                                               int partitionId) {
  final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>();
  final Path rootLocation = new Path(partitionDir);
  try {
    // TODO: DX-16001 - make async configurable for Hive.
    final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job);

    if (fs.exists(rootLocation)) {
      final FileStatus rootStatus = fs.getFileStatus(rootLocation);
      if (rootStatus.isDirectory()) {
        cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
          .setPath(EMPTY_STRING)
          .setLastModificationTime(rootStatus.getModificationTime())
          .setIsDir(true)
          .build());

        final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false);
        while (statuses.hasNext()) {
          LocatedFileStatus fileStatus = statuses.next();
          final Path filePath = fileStatus.getPath();
          if (fileStatus.isDirectory()) {
            cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
              .setPath(PathUtils.relativePath(filePath, rootLocation))
              .setLastModificationTime(fileStatus.getModificationTime())
              .setIsDir(true)
              .build());
          } else if (fileStatus.isFile() && !directoriesOnly) {
            cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
              .setPath(PathUtils.relativePath(filePath, rootLocation))
              .setLastModificationTime(fileStatus.getModificationTime())
              .setIsDir(false)
              .build());
          }
        }
      } else {
        cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
          .setPath(EMPTY_STRING)
          .setLastModificationTime(rootStatus.getModificationTime())
          .setIsDir(false)
          .build());
      }
      return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder()
        .setPartitionId(partitionId)
        .setPartitionRootDir(fs.makeQualified(rootLocation).toString())
        .addAllCachedEntities(cachedEntities)
        .build();
    }
    return null;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 19
Source File: TraceBuilder.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Processes the input file/folder argument. If the input is a file,
 * then it is directly considered for further processing by TraceBuilder.
 * If the input is a folder, then all the history logs in the
 * input folder are considered for further processing.
 *
 * If isRecursive is true, then the input path is recursively scanned
 * for job history logs for further processing by TraceBuilder.
 *
 * NOTE: If the input represents a globbed path, then it is first flattened
 *       and then the individual paths represented by the globbed input
 *       path are considered for further processing.
 *
 * @param input        input path, possibly globbed
 * @param conf         configuration
 * @param isRecursive  whether to recursively traverse the input paths to
 *                     find history logs
 * @return the input history log files' paths
 * @throws FileNotFoundException
 * @throws IOException
 */
static List<Path> processInputArgument(String input, Configuration conf,
    boolean isRecursive) throws FileNotFoundException, IOException {
  Path inPath = new Path(input);
  FileSystem fs = inPath.getFileSystem(conf);
  FileStatus[] inStatuses = fs.globStatus(inPath);

  List<Path> inputPaths = new LinkedList<Path>();
  if (inStatuses == null || inStatuses.length == 0) {
    return inputPaths;
  }

  for (FileStatus inStatus : inStatuses) {
    Path thisPath = inStatus.getPath();
    if (inStatus.isDirectory()) {

      // Find list of files in this path(recursively if -recursive option
      // is specified).
      List<FileStatus> historyLogs = new ArrayList<FileStatus>();

      RemoteIterator<LocatedFileStatus> iter =
        fs.listFiles(thisPath, isRecursive);
      while (iter.hasNext()) {
        LocatedFileStatus child = iter.next();
        String fileName = child.getPath().getName();

        if (!(fileName.endsWith(".crc") || fileName.startsWith("."))) {
          historyLogs.add(child);
        }
      }

      if (historyLogs.size() > 0) {
        // Add the sorted history log file names in this path to the
        // inputPaths list
        FileStatus[] sortableNames =
            historyLogs.toArray(new FileStatus[historyLogs.size()]);
        Arrays.sort(sortableNames, new HistoryLogsComparator());

        for (FileStatus historyLog : sortableNames) {
          inputPaths.add(historyLog.getPath());
        }
      }
    } else {
      inputPaths.add(thisPath);
    }
  }

  return inputPaths;
}
 
Example 20
Source File: LogCLIHelpers.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Private
@VisibleForTesting
public int dumpAContainersLogs(String appId, String containerId,
    String nodeId, String jobOwner) throws IOException {

  ApplicationId applicationId = ConverterUtils.toApplicationId(appId);
  List<Path> remoteAppLogDirs = AggregatedLogsBlock.getRemoteAppLogDirs(getConf(), applicationId, jobOwner);
  String remoteAppLogDir = StringUtils.join(remoteAppLogDirs, ",");

  RemoteIterator<FileStatus> nodeFiles;
  try {
    nodeFiles = AggregatedLogsBlock.getFileListAtRemoteAppDir(getConf(), remoteAppLogDirs, applicationId, jobOwner);
  } catch (FileNotFoundException fnf) {
    logDirNotExist(remoteAppLogDir.toString());
    return -1;
  }
  boolean foundContainerLogs = false;
  while (nodeFiles.hasNext()) {
    FileStatus thisNodeFile = nodeFiles.next();
    String fileName = thisNodeFile.getPath().getName();
    if (fileName.contains(LogAggregationUtils.getNodeString(nodeId))
        && !fileName.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
      AggregatedLogFormat.LogReader reader = null;
      try {
        reader =
            new AggregatedLogFormat.LogReader(getConf(),
              thisNodeFile.getPath());
        if (dumpAContainerLogs(containerId, reader, System.out,
            thisNodeFile.getModificationTime()) > -1) {
          foundContainerLogs = true;
        }
      } finally {
        if (reader != null) {
          reader.close();
        }
      }
    }
  }
  if (!foundContainerLogs) {
    containerLogNotFound(containerId);
    return -1;
  }
  return 0;
}