Java Code Examples for org.apache.hadoop.fs.RemoteIterator#next()

The following examples show how to use org.apache.hadoop.fs.RemoteIterator#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FSAgent.java    From Bats with Apache License 2.0 7 votes vote down vote up
public List<String> listFiles(String dir) throws IOException
{
  List<String> files = new ArrayList<>();
  Path path = new Path(dir);

  FileStatus fileStatus = fileSystem.getFileStatus(path);
  if (!fileStatus.isDirectory()) {
    throw new FileNotFoundException("Cannot read directory " + dir);
  }
  RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
  while (it.hasNext()) {
    LocatedFileStatus lfs = it.next();
    files.add(lfs.getPath().getName());
  }
  return files;
}
 
Example 2
Source File: TestRetryCacheWithHA.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private void listCacheDirectives(
    HashSet<String> poolNames, int active) throws Exception {
  HashSet<String> tmpNames = (HashSet<String>)poolNames.clone();
  RemoteIterator<CacheDirectiveEntry> directives = dfs.listCacheDirectives(null);
  int poolCount = poolNames.size();
  for (int i=0; i<poolCount; i++) {
    CacheDirectiveEntry directive = directives.next();
    String pollName = directive.getInfo().getPool();
    assertTrue("The pool name should be expected", tmpNames.remove(pollName));
    if (i % 2 == 0) {
      int standby = active;
      active = (standby == 0) ? 1 : 0;
      cluster.transitionToStandby(standby);
      cluster.transitionToActive(active);
      cluster.waitActive(active);
    }
  }
  assertTrue("All pools must be found", tmpNames.isEmpty());
}
 
Example 3
Source File: TestFileStatus.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Test the FileStatus obtained calling listStatus on a file */
@Test
public void testListStatusOnFile() throws IOException {
  FileStatus[] stats = fs.listStatus(file1);
  assertEquals(1, stats.length);
  FileStatus status = stats[0];
  assertFalse(file1 + " should be a file", status.isDirectory());
  assertEquals(blockSize, status.getBlockSize());
  assertEquals(1, status.getReplication());
  assertEquals(fileSize, status.getLen());
  assertEquals(file1.makeQualified(fs.getUri(), 
      fs.getWorkingDirectory()).toString(), 
      status.getPath().toString());
  
  RemoteIterator<FileStatus> itor = fc.listStatus(file1);
  status = itor.next();
  assertEquals(stats[0], status);
  assertFalse(file1 + " should be a file", status.isDirectory());
}
 
Example 4
Source File: ResourceLocalizationService.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void deleteLocalDir(FileContext lfs, DeletionService del,
    String localDir) throws IOException {
  RemoteIterator<FileStatus> fileStatus = lfs.listStatus(new Path(localDir));
  if (fileStatus != null) {
    while (fileStatus.hasNext()) {
      FileStatus status = fileStatus.next();
      try {
        if (status.getPath().getName().matches(".*" +
            ContainerLocalizer.USERCACHE + "_DEL_.*")) {
          LOG.info("usercache path : " + status.getPath().toString());
          cleanUpFilesPerUserDir(lfs, del, status.getPath());
        } else if (status.getPath().getName()
            .matches(".*" + NM_PRIVATE_DIR + "_DEL_.*")
            ||
            status.getPath().getName()
                .matches(".*" + ContainerLocalizer.FILECACHE + "_DEL_.*")) {
          del.delete(null, status.getPath(), new Path[] {});
        }
      } catch (IOException ex) {
        // Do nothing, just give the warning
        LOG.warn("Failed to delete this local Directory: " +
            status.getPath().getName());
      }
    }
  }
}
 
Example 5
Source File: HistoryFileManager.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
protected static List<FileStatus> scanDirectory(Path path, FileContext fc,
    PathFilter pathFilter) throws IOException {
  path = fc.makeQualified(path);
  List<FileStatus> jhStatusList = new ArrayList<FileStatus>();
  try {
    RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path);
    while (fileStatusIter.hasNext()) {
      FileStatus fileStatus = fileStatusIter.next();
      Path filePath = fileStatus.getPath();
      if (fileStatus.isFile() && pathFilter.accept(filePath)) {
        jhStatusList.add(fileStatus);
      }
    }
  } catch (FileNotFoundException fe) {
    LOG.error("Error while scanning directory " + path, fe);
  }
  return jhStatusList;
}
 
Example 6
Source File: S3PartitionedOutputCommitter.java    From s3committer with Apache License 2.0 6 votes vote down vote up
@Override
protected List<FileStatus> getTaskOutput(TaskAttemptContext context)
    throws IOException {
  PathFilter filter = HiddenPathFilter.get();

  // get files on the local FS in the attempt path
  Path attemptPath = getTaskAttemptPath(context);
  FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration());
  RemoteIterator<LocatedFileStatus> iter = attemptFS
      .listFiles(attemptPath, true /* recursive */ );

  List<FileStatus> stats = Lists.newArrayList();
  while (iter.hasNext()) {
    FileStatus stat = iter.next();
    if (filter.accept(stat.getPath())) {
      stats.add(stat);
    }
  }

  return stats;
}
 
Example 7
Source File: LifecycleIT.java    From spydra with Apache License 2.0 5 votes vote down vote up
private int getFileCount(URI uri) throws IOException {
  FileSystem fs = gcpUtils.fileSystemForUri(uri);
  RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(uri), true);
  int count = 0;
  while (it.hasNext()) {
    it.next();
    count++;
  }
  return count;
}
 
Example 8
Source File: TestNodeManagerReboot.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private int numOfUsercacheDELDirs(String localDir) throws IOException {
  int count = 0;
  RemoteIterator<FileStatus> fileStatus = localFS.listStatus(new Path(localDir));
  while (fileStatus.hasNext()) {
    FileStatus status = fileStatus.next();
    if (status.getPath().getName().matches(".*" +
        ContainerLocalizer.USERCACHE + "_DEL_.*")) {
      count++;
    }
  }
  return count;
}
 
Example 9
Source File: TestLogAggregationService.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private int numOfLogsAvailable(LogAggregationService logAggregationService,
    ApplicationId appId, boolean sizeLimited, String lastLogFile)
    throws IOException {
  Path appLogDir = logAggregationService.getRemoteAppLogDir(appId, this.user);
  RemoteIterator<FileStatus> nodeFiles = null;
  try {
    Path qualifiedLogDir =
        FileContext.getFileContext(this.conf).makeQualified(appLogDir);
    nodeFiles =
        FileContext.getFileContext(qualifiedLogDir.toUri(), this.conf)
          .listStatus(appLogDir);
  } catch (FileNotFoundException fnf) {
    return -1;
  }
  int count = 0;
  while (nodeFiles.hasNext()) {
    FileStatus status = nodeFiles.next();
    String filename = status.getPath().getName();
    if (filename.contains(LogAggregationUtils.TMP_FILE_SUFFIX)
        || (lastLogFile != null && filename.contains(lastLogFile)
            && sizeLimited)) {
      return -1;
    }
    if (filename.contains(LogAggregationUtils
      .getNodeString(logAggregationService.getNodeId()))) {
      count++;
    }
  }
  return count;
}
 
Example 10
Source File: HdfsFileWatcherPolicy.java    From kafka-connect-fs with Apache License 2.0 5 votes vote down vote up
private void enqueue(String path) throws IOException {
    Path filePath = new Path(path);
    if (!fs.exists(filePath) || fs.getFileStatus(filePath) == null) {
        log.info("Cannot enqueue file [{}] because it does not exist but got an event from the FS", filePath);
        return;
    }

    log.debug("Enqueuing file to process [{}]", filePath);
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, false);
    while (it.hasNext()) {
        LocatedFileStatus status = it.next();
        if (!status.isFile() || !fileRegexp.matcher(status.getPath().getName()).find()) continue;
        fileQueue.offer(toMetadata(status));
    }
}
 
Example 11
Source File: HDFSEasy.java    From camel-kafka-connector with Apache License 2.0 5 votes vote down vote up
public List<LocatedFileStatus> listFiles(Path path) throws IOException {
    RemoteIterator<LocatedFileStatus> i = dfs.listFiles(path, false);

    List<LocatedFileStatus> retList = new ArrayList<>();
    while (i.hasNext()) {
        LocatedFileStatus locatedFileStatus = i.next();
        retList.add(locatedFileStatus);
    }

    return retList;
}
 
Example 12
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (Path p: dirs) {
    FileSystem fs = p.getFileSystem(job); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }
  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 13
Source File: ProtoParquetWriterWithOffsetTest.java    From garmadon with Apache License 2.0 5 votes vote down vote up
private List<EventHeaderProtos.Header> checkSingleFileWithFileSystem(
    Collection<EventHeaderProtos.Header> inputHeaders) throws IOException {
    final List<EventHeaderProtos.Header> headers = new LinkedList<>();

    Path newTmpFile = new Path(tmpPath, "file");
    final ProtoParquetWriter<Message> writer = new ProtoParquetWriter<>(newTmpFile,
        EventHeaderProtos.Header.class);
    long offset = 1;
    final BiConsumer<String, String> protoMetadataWriter = mock(BiConsumer.class);

    final ProtoParquetWriterWithOffset consumer = new ProtoParquetWriterWithOffset<>(writer, newTmpFile, finalPath,
        localFs, new FixedOffsetComputer(FINAL_FILE_NAME, 123), UTC_EPOCH, "ignored",
        protoMetadataWriter, 1);

    for (EventHeaderProtos.Header header : inputHeaders) {
        consumer.write(1234567890L, header, new TopicPartitionOffset(TOPIC, 1, offset++));
    }
    consumer.close();

    final RemoteIterator<LocatedFileStatus> filesIterator = localFs.listFiles(finalPath, false);
    final LocatedFileStatus fileStatus = filesIterator.next();
    Assert.assertEquals(FINAL_FILE_NAME, fileStatus.getPath().getName());
    Assert.assertFalse("There should be only one output file", filesIterator.hasNext());

    final ParquetReader<EventHeaderProtos.Header.Builder> reader;
    reader = ProtoParquetReader.<EventHeaderProtos.Header.Builder>builder(fileStatus.getPath()).build();

    EventHeaderProtos.Header.Builder current = reader.read();
    while (current != null) {
        headers.add(current.build());
        current = reader.read();
    }

    return headers;
}
 
Example 14
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example 15
Source File: TestListFilesInFileContext.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** Test when input patch has a symbolic links as its children */
@Test
public void testSymbolicLinks() throws IOException {
  writeFile(fc, FILE1, FILE_LEN);
  writeFile(fc, FILE2, FILE_LEN);
  writeFile(fc, FILE3, FILE_LEN);
  
  Path dir4 = new Path(TEST_DIR, "dir4");
  Path dir5 = new Path(dir4, "dir5");
  Path file4 = new Path(dir4, "file4");
  
  fc.createSymlink(DIR1, dir5, true);
  fc.createSymlink(FILE1, file4, true);
  
  RemoteIterator<LocatedFileStatus> itor = fc.util().listFiles(dir4, true);
  LocatedFileStatus stat = itor.next();
  assertTrue(stat.isFile());
  assertEquals(fc.makeQualified(FILE2), stat.getPath());
  stat = itor.next();
  assertTrue(stat.isFile());
  assertEquals(fc.makeQualified(FILE3), stat.getPath());
  stat = itor.next();
  assertTrue(stat.isFile());
  assertEquals(fc.makeQualified(FILE1), stat.getPath());
  assertFalse(itor.hasNext());
  
  itor = fc.util().listFiles(dir4, false);
  stat = itor.next();
  assertTrue(stat.isFile());
  assertEquals(fc.makeQualified(FILE1), stat.getPath());
  assertFalse(itor.hasNext());
}
 
Example 16
Source File: TestEncryptionZones.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void assertNumZones(final int numZones) throws IOException {
  RemoteIterator<EncryptionZone> it = dfsAdmin.listEncryptionZones();
  int count = 0;
  while (it.hasNext()) {
    count++;
    it.next();
  }
  assertEquals("Unexpected number of encryption zones!", numZones, count);
}
 
Example 17
Source File: HDFSEasy.java    From camel-kafka-connector with Apache License 2.0 5 votes vote down vote up
public int countFiles(Path path) throws IOException {
    RemoteIterator<LocatedFileStatus> i = dfs.listFiles(path, false);
    int files = 0;
    while (i.hasNext()) {
        files++;
        i.next();
    }

    return files;
}
 
Example 18
Source File: TestCacheDirectives.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test(timeout=120000)
public void testWaitForCachedReplicas() throws Exception {
  FileSystemTestHelper helper = new FileSystemTestHelper();
  GenericTestUtils.waitFor(new Supplier<Boolean>() {
    @Override
    public Boolean get() {
      return ((namenode.getNamesystem().getCacheCapacity() ==
          (NUM_DATANODES * CACHE_CAPACITY)) &&
            (namenode.getNamesystem().getCacheUsed() == 0));
    }
  }, 500, 60000);

  // Send a cache report referring to a bogus block.  It is important that
  // the NameNode be robust against this.
  NamenodeProtocols nnRpc = namenode.getRpcServer();
  DataNode dn0 = cluster.getDataNodes().get(0);
  String bpid = cluster.getNamesystem().getBlockPoolId();
  LinkedList<Long> bogusBlockIds = new LinkedList<Long> ();
  bogusBlockIds.add(999999L);
  nnRpc.cacheReport(dn0.getDNRegistrationForBP(bpid), bpid, bogusBlockIds);

  Path rootDir = helper.getDefaultWorkingDirectory(dfs);
  // Create the pool
  final String pool = "friendlyPool";
  nnRpc.addCachePool(new CachePoolInfo("friendlyPool"));
  // Create some test files
  final int numFiles = 2;
  final int numBlocksPerFile = 2;
  final List<String> paths = new ArrayList<String>(numFiles);
  for (int i=0; i<numFiles; i++) {
    Path p = new Path(rootDir, "testCachePaths-" + i);
    FileSystemTestHelper.createFile(dfs, p, numBlocksPerFile,
        (int)BLOCK_SIZE);
    paths.add(p.toUri().getPath());
  }
  // Check the initial statistics at the namenode
  waitForCachedBlocks(namenode, 0, 0, "testWaitForCachedReplicas:0");
  // Cache and check each path in sequence
  int expected = 0;
  for (int i=0; i<numFiles; i++) {
    CacheDirectiveInfo directive =
        new CacheDirectiveInfo.Builder().
          setPath(new Path(paths.get(i))).
          setPool(pool).
          build();
    nnRpc.addCacheDirective(directive, EnumSet.noneOf(CacheFlag.class));
    expected += numBlocksPerFile;
    waitForCachedBlocks(namenode, expected, expected,
        "testWaitForCachedReplicas:1");
  }

  // Check that the datanodes have the right cache values
  DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE);
  assertEquals("Unexpected number of live nodes", NUM_DATANODES, live.length);
  long totalUsed = 0;
  for (DatanodeInfo dn : live) {
    final long cacheCapacity = dn.getCacheCapacity();
    final long cacheUsed = dn.getCacheUsed();
    final long cacheRemaining = dn.getCacheRemaining();
    assertEquals("Unexpected cache capacity", CACHE_CAPACITY, cacheCapacity);
    assertEquals("Capacity not equal to used + remaining",
        cacheCapacity, cacheUsed + cacheRemaining);
    assertEquals("Remaining not equal to capacity - used",
        cacheCapacity - cacheUsed, cacheRemaining);
    totalUsed += cacheUsed;
  }
  assertEquals(expected*BLOCK_SIZE, totalUsed);

  // Uncache and check each path in sequence
  RemoteIterator<CacheDirectiveEntry> entries =
    new CacheDirectiveIterator(nnRpc, null, Sampler.NEVER);
  for (int i=0; i<numFiles; i++) {
    CacheDirectiveEntry entry = entries.next();
    nnRpc.removeCacheDirective(entry.getInfo().getId());
    expected -= numBlocksPerFile;
    waitForCachedBlocks(namenode, expected, expected,
        "testWaitForCachedReplicas:2");
  }
}
 
Example 19
Source File: LogCLIHelpers.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Private
@VisibleForTesting
public int dumpAContainersLogs(String appId, String containerId,
    String nodeId, String jobOwner) throws IOException {

  ApplicationId applicationId = ConverterUtils.toApplicationId(appId);
  List<Path> remoteAppLogDirs = AggregatedLogsBlock.getRemoteAppLogDirs(getConf(), applicationId, jobOwner);
  String remoteAppLogDir = StringUtils.join(remoteAppLogDirs, ",");

  RemoteIterator<FileStatus> nodeFiles;
  try {
    nodeFiles = AggregatedLogsBlock.getFileListAtRemoteAppDir(getConf(), remoteAppLogDirs, applicationId, jobOwner);
  } catch (FileNotFoundException fnf) {
    logDirNotExist(remoteAppLogDir.toString());
    return -1;
  }
  boolean foundContainerLogs = false;
  while (nodeFiles.hasNext()) {
    FileStatus thisNodeFile = nodeFiles.next();
    String fileName = thisNodeFile.getPath().getName();
    if (fileName.contains(LogAggregationUtils.getNodeString(nodeId))
        && !fileName.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
      AggregatedLogFormat.LogReader reader = null;
      try {
        reader =
            new AggregatedLogFormat.LogReader(getConf(),
              thisNodeFile.getPath());
        if (dumpAContainerLogs(containerId, reader, System.out,
            thisNodeFile.getModificationTime()) > -1) {
          foundContainerLogs = true;
        }
      } finally {
        if (reader != null) {
          reader.close();
        }
      }
    }
  }
  if (!foundContainerLogs) {
    containerLogNotFound(containerId);
    return -1;
  }
  return 0;
}
 
Example 20
Source File: TestListFilesInFileContext.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Test when input path is a directory */
@Test
public void testDirectory() throws IOException {
  fc.mkdir(DIR1, FsPermission.getDefault(), true);

  // test empty directory
  RemoteIterator<LocatedFileStatus> itor = fc.util().listFiles(
      DIR1, true);
  assertFalse(itor.hasNext());
  itor = fc.util().listFiles(DIR1, false);
  assertFalse(itor.hasNext());
  
  // testing directory with 1 file
  writeFile(fc, FILE2, FILE_LEN);
  
  itor = fc.util().listFiles(DIR1, true);
  LocatedFileStatus stat = itor.next();
  assertFalse(itor.hasNext());
  assertTrue(stat.isFile());
  assertEquals(FILE_LEN, stat.getLen());
  assertEquals(fc.makeQualified(FILE2), stat.getPath());
  assertEquals(1, stat.getBlockLocations().length);
  
  itor = fc.util().listFiles(DIR1, false);
  stat = itor.next();
  assertFalse(itor.hasNext());
  assertTrue(stat.isFile());
  assertEquals(FILE_LEN, stat.getLen());
  assertEquals(fc.makeQualified(FILE2), stat.getPath());
  assertEquals(1, stat.getBlockLocations().length);

  // test more complicated directory
  writeFile(fc, FILE1, FILE_LEN);
  writeFile(fc, FILE3, FILE_LEN);

  itor = fc.util().listFiles(TEST_DIR, true);
  stat = itor.next();
  assertTrue(stat.isFile());
  assertEquals(fc.makeQualified(FILE2), stat.getPath());
  stat = itor.next();
  assertTrue(stat.isFile());
  assertEquals(fc.makeQualified(FILE3), stat.getPath());
  stat = itor.next();
  assertTrue(stat.isFile());
  assertEquals(fc.makeQualified(FILE1), stat.getPath());
  assertFalse(itor.hasNext());
  
  itor = fc.util().listFiles(TEST_DIR, false);
  stat = itor.next();
  assertTrue(stat.isFile());
  assertEquals(fc.makeQualified(FILE1), stat.getPath());
  assertFalse(itor.hasNext());
}