Java Code Examples for org.apache.hadoop.fs.LocatedFileStatus#isFile()

The following examples show how to use org.apache.hadoop.fs.LocatedFileStatus#isFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CachingFileSystem.java    From rubix with Apache License 2.0 6 votes vote down vote up
@Override
public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path path)
        throws IOException
{
    return new RemoteIterator<LocatedFileStatus>()
    {
        private final RemoteIterator<LocatedFileStatus> stats = fs.listLocatedStatus(path);

        @Override
        public boolean hasNext()
                throws IOException
        {
            return stats.hasNext();
        }

        @Override
        public LocatedFileStatus next()
                throws IOException
        {
            LocatedFileStatus status = stats.next();
            // use caching locations explicitly
            BlockLocation[] locations = status.isFile() ? getFileBlockLocations(status.getPath(), 0, status.getLen()) : null;
            return new LocatedFileStatus(status, locations);
        }
    };
}
 
Example 2
Source File: SegmentHelper.java    From indexr with Apache License 2.0 6 votes vote down vote up
public static void literalAllSegments(FileSystem fileSystem, Path dir, Consumer<LocatedFileStatus> consumer) throws IOException {
    RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(dir, true);
    while (files.hasNext()) {
        LocatedFileStatus fileStatus = files.next();
        if (!fileStatus.isFile()) {
            continue;
        }
        if (fileStatus.getLen() == 0) {
            continue;
        }

        Path path = fileStatus.getPath();
        if (checkSegmentByPath(path)) {
            consumer.accept(fileStatus);
        }
    }
}
 
Example 3
Source File: AbstractFlagConfig.java    From datawave with Apache License 2.0 5 votes vote down vote up
protected Path getTestFile(FileSystem fs) throws IOException {
    createTestFiles(1, 1);
    Path file = null;
    for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(this.fmc.getBaseHDFSDir()), true); it.hasNext();) {
        LocatedFileStatus status = it.next();
        if (status.isFile()) {
            file = status.getPath();
            break;
        }
    }
    return file;
}
 
Example 4
Source File: HdfsFileWatcherPolicy.java    From kafka-connect-fs with Apache License 2.0 5 votes vote down vote up
private void enqueue(String path) throws IOException {
    Path filePath = new Path(path);
    if (!fs.exists(filePath) || fs.getFileStatus(filePath) == null) {
        log.info("Cannot enqueue file [{}] because it does not exist but got an event from the FS", filePath);
        return;
    }

    log.debug("Enqueuing file to process [{}]", filePath);
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, false);
    while (it.hasNext()) {
        LocatedFileStatus status = it.next();
        if (!status.isFile() || !fileRegexp.matcher(status.getPath().getName()).find()) continue;
        fileQueue.offer(toMetadata(status));
    }
}
 
Example 5
Source File: TestBackupLogCleaner.java    From hbase with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> getListOfWALFiles(Configuration c) throws IOException {
  Path logRoot = new Path(CommonFSUtils.getWALRootDir(c), HConstants.HREGION_LOGDIR_NAME);
  FileSystem fs = logRoot.getFileSystem(c);
  RemoteIterator<LocatedFileStatus> it = fs.listFiles(logRoot, true);
  List<FileStatus> logFiles = new ArrayList<FileStatus>();
  while (it.hasNext()) {
    LocatedFileStatus lfs = it.next();
    if (lfs.isFile() && !AbstractFSWALProvider.isMetaFile(lfs.getPath())) {
      logFiles.add(lfs);
      LOG.info(Objects.toString(lfs));
    }
  }
  return logFiles;
}
 
Example 6
Source File: TestRestoreSnapshotHelper.java    From hbase with Apache License 2.0 5 votes vote down vote up
private boolean hasHFileLink(Path tableDir) throws IOException {
  if (fs.exists(tableDir)) {
    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(tableDir, true);
    while (iterator.hasNext()) {
      LocatedFileStatus fileStatus = iterator.next();
      if (fileStatus.isFile() && HFileLink.isHFileLink(fileStatus.getPath())) {
        return true;
      }
    }
  }
  return false;
}
 
Example 7
Source File: CompleteSetupIntegrationTest.java    From searchanalytics-bigdata with MIT License 5 votes vote down vote up
private void FlumehdfsSinkAndTestData(List<Event> searchEvents)
		throws EventDeliveryException, IOException, FileNotFoundException {

	flumeHDFSSinkService.processEvents(searchEvents);

	// list all files and check data.
	Path dirPath = new Path(hadoopClusterService.getHDFSUri()
			+ "/searchevents");
	// FileStatus[] dirStat = fs.listStatus(dirPath);
	// Path fList[] = FileUtil.stat2Paths(dirStat);

	DistributedFileSystem fs = hadoopClusterService.getFileSystem();
	RemoteIterator<LocatedFileStatus> files = fs.listFiles(dirPath, true);
	while (files.hasNext()) {
		LocatedFileStatus locatedFileStatus = files.next();
		System.out.println("Check:" + locatedFileStatus.getPath());
		if (locatedFileStatus.isFile()) {
			Path path = locatedFileStatus.getPath();
			if (path.getName().startsWith("searchevents")) {
				FSDataInputStream input = fs.open(path);
				BufferedReader reader = new BufferedReader(
						new InputStreamReader(input));
				String body = null;
				while ((body = reader.readLine()) != null) {
					System.out.println("body is:" + body);
				}
				reader.close();
				input.close();
			}
		}
	}
}
 
Example 8
Source File: AbstractSearchJUnit4SpringContextTests.java    From searchanalytics-bigdata with MIT License 5 votes vote down vote up
protected int printAndCountHdfsFileDirData(String path, String filePrefix,
		boolean print, boolean count) throws IOException {
	int recordsCount = 0;
	DistributedFileSystem fs = hadoopClusterService.getFileSystem();
	RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path(path),
			true);
	while (files.hasNext()) {
		LocatedFileStatus locatedFileStatus = files.next();
		System.out.println("Check:" + locatedFileStatus.getPath());
		if (locatedFileStatus.isFile()) {
			Path filePath = locatedFileStatus.getPath();
			if (filePath.getName().startsWith(filePrefix)) {
				FSDataInputStream input = fs.open(filePath);
				BufferedReader reader = new BufferedReader(
						new InputStreamReader(input));
				String body = null;
				while ((body = reader.readLine()) != null) {
					if (print) {
						System.out.println("file is: " + filePath.getName() + "body is:" + body);
					}
					if (count) {
						recordsCount++;
					}
				}
				reader.close();
				input.close();
			}
		}
	}
	return recordsCount;
}
 
Example 9
Source File: FlumeHDFSSinkServiceTest.java    From searchanalytics-bigdata with MIT License 5 votes vote down vote up
@Test
public void testProcessEvents() throws FileNotFoundException, IOException {
	int searchEventsCount = 101;
	List<Event> searchEvents = generateSearchAnalyticsDataService
			.getSearchEvents(searchEventsCount);

	flumeHDFSSinkService.processEvents(searchEvents);

	// list all files and check data.
	Path dirPath = new Path(hadoopClusterService.getHDFSUri()
			+ "/searchevents");
	// FileStatus[] dirStat = fs.listStatus(dirPath);
	// Path fList[] = FileUtil.stat2Paths(dirStat);

	DistributedFileSystem fs = hadoopClusterService.getFileSystem();
	RemoteIterator<LocatedFileStatus> files = fs.listFiles(dirPath, true);
	while (files.hasNext()) {
		LocatedFileStatus locatedFileStatus = files.next();
		System.out.println("Check:" + locatedFileStatus.getPath());
		if (locatedFileStatus.isFile()) {
			Path path = locatedFileStatus.getPath();
			if (path.getName().startsWith("searchevents")) {
				FSDataInputStream input = fs.open(path);
				BufferedReader reader = new BufferedReader(
						new InputStreamReader(input));
				String body = null;
				while ((body = reader.readLine()) != null) {
					System.out.println("body is:" + body);
				}
				reader.close();
				input.close();
			}
		}
	}
}
 
Example 10
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job,
                                                                               boolean isRecursive, boolean directoriesOnly,
                                                                               int partitionId) {
  final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>();
  final Path rootLocation = new Path(partitionDir);
  try {
    // TODO: DX-16001 - make async configurable for Hive.
    final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job);

    if (fs.exists(rootLocation)) {
      final FileStatus rootStatus = fs.getFileStatus(rootLocation);
      if (rootStatus.isDirectory()) {
        cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
          .setPath(EMPTY_STRING)
          .setLastModificationTime(rootStatus.getModificationTime())
          .setIsDir(true)
          .build());

        final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false);
        while (statuses.hasNext()) {
          LocatedFileStatus fileStatus = statuses.next();
          final Path filePath = fileStatus.getPath();
          if (fileStatus.isDirectory()) {
            cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
              .setPath(PathUtils.relativePath(filePath, rootLocation))
              .setLastModificationTime(fileStatus.getModificationTime())
              .setIsDir(true)
              .build());
          } else if (fileStatus.isFile() && !directoriesOnly) {
            cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
              .setPath(PathUtils.relativePath(filePath, rootLocation))
              .setLastModificationTime(fileStatus.getModificationTime())
              .setIsDir(false)
              .build());
          }
        }
      } else {
        cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
          .setPath(EMPTY_STRING)
          .setLastModificationTime(rootStatus.getModificationTime())
          .setIsDir(false)
          .build());
      }
      return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder()
        .setPartitionId(partitionId)
        .setPartitionRootDir(fs.makeQualified(rootLocation).toString())
        .addAllCachedEntities(cachedEntities)
        .build();
    }
    return null;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 11
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job,
                                                                               boolean isRecursive, boolean directoriesOnly,
                                                                               int partitionId) {
  final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>();
  final Path rootLocation = new Path(partitionDir);
  try {
    // TODO: DX-16001 - make async configurable for Hive.
    final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job);

    if (fs.exists(rootLocation)) {
      final FileStatus rootStatus = fs.getFileStatus(rootLocation);
      if (rootStatus.isDirectory()) {
        cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
          .setPath(EMPTY_STRING)
          .setLastModificationTime(rootStatus.getModificationTime())
          .setIsDir(true)
          .build());

        final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false);
        while (statuses.hasNext()) {
          LocatedFileStatus fileStatus = statuses.next();
          final Path filePath = fileStatus.getPath();
          if (fileStatus.isDirectory()) {
            cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
              .setPath(PathUtils.relativePath(filePath, rootLocation))
              .setLastModificationTime(fileStatus.getModificationTime())
              .setIsDir(true)
              .build());
          } else if (fileStatus.isFile() && !directoriesOnly) {
            cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
              .setPath(PathUtils.relativePath(filePath, rootLocation))
              .setLastModificationTime(fileStatus.getModificationTime())
              .setIsDir(false)
              .build());
          }
        }
      } else {
        cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
          .setPath(EMPTY_STRING)
          .setLastModificationTime(rootStatus.getModificationTime())
          .setIsDir(false)
          .build());
      }
      return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder()
        .setPartitionId(partitionId)
        .setPartitionRootDir(fs.makeQualified(rootLocation).toString())
        .addAllCachedEntities(cachedEntities)
        .build();
    }
    return null;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 12
Source File: DistributedFileSystemMetadata.java    From hdfs-metadata with GNU General Public License v3.0 4 votes vote down vote up
public LinkedList<BlockLocation> getBlockLocations(Path path) throws IOException {
	LOG.info("Collecting block locations...");
	
	LinkedList<BlockLocation> blockLocations = new LinkedList<BlockLocation>();
	RemoteIterator<LocatedFileStatus> statuses = listFiles(path, true);
	int hasNextCode = hasNextCode(statuses);
	while(hasNextCode > 0){
		if(hasNextCode > 1){
			hasNextCode = hasNextCode(statuses);
			continue;
		}
		
		LocatedFileStatus fileStatus = statuses.next();
		
		if(fileStatus.isFile()){
			BlockLocation[] blockLocations_tmp = getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
			
			blockLocations.addAll(Arrays.asList(blockLocations_tmp));
		}
		
		int size = blockLocations.size();
		if(size > 0 && size % 5000 == 0)
			LOG.info("Collected " + size + " locations. Still in progress...");
		
		if(size >= MAX_NUMBER_OF_LOCATIONS){
			LOG.info("Reached max number of locations to collect. The amount will be representative enough.");
			break;
		}
		
		hasNextCode = hasNextCode(statuses);
	}
	LOG.info("Collected " + blockLocations.size() + " locations.");
	
	if(isHdfsBlocksMetadataEnabled()){
		BlockStorageLocation[] blockStorageLocations = getFileBlockStorageLocations(blockLocations);
		
		blockLocations.clear();
		blockLocations.addAll(Arrays.asList(blockStorageLocations));
	}else{
		LOG.error("VolumnId/DiskId can not be collected since "
				+ "dfs.datanode.hdfs-blocks-metadata.enabled is not enabled.");
	}
	
	return blockLocations;
}
 
Example 13
Source File: TestPlannerUtil.java    From tajo with Apache License 2.0 4 votes vote down vote up
@Test
public void testGetNonZeroLengthDataFiles() throws Exception {
  String queryFiles = ClassLoader.getSystemResource("queries").toString() + "/TestSelectQuery";
  Path path = new Path(queryFiles);

  TableDesc tableDesc = new TableDesc();
  tableDesc.setName("Test");
  tableDesc.setUri(path.toUri());

  FileSystem fs = path.getFileSystem(util.getConfiguration());

  List<Path> expectedFiles = new ArrayList<>();
  RemoteIterator<LocatedFileStatus> files = fs.listFiles(path, true);
  while (files.hasNext()) {
    LocatedFileStatus file = files.next();
    if (file.isFile() && file.getLen() > 0) {
      expectedFiles.add(file.getPath());
    }
  }
  int fileNum = expectedFiles.size() / 5;

  int numResultFiles = 0;
  for (int i = 0; i <= 5; i++) {
    int start = i * fileNum;

    FragmentProto[] fragments =
        PhysicalPlanUtil.getNonZeroLengthDataFiles(util.getConfiguration(), tableDesc, start, fileNum);
    assertNotNull(fragments);

    numResultFiles += fragments.length;
    int expectedSize = fileNum;
    if (i == 5) {
      //last
      expectedSize = expectedFiles.size() - (fileNum * 5);
    }

    comparePath(expectedFiles, fragments, start, expectedSize);
  }

  assertEquals(expectedFiles.size(), numResultFiles);
}