Java Code Examples for org.apache.hadoop.fs.LocatedFileStatus#isFile()
The following examples show how to use
org.apache.hadoop.fs.LocatedFileStatus#isFile() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CachingFileSystem.java From rubix with Apache License 2.0 | 6 votes |
@Override public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path path) throws IOException { return new RemoteIterator<LocatedFileStatus>() { private final RemoteIterator<LocatedFileStatus> stats = fs.listLocatedStatus(path); @Override public boolean hasNext() throws IOException { return stats.hasNext(); } @Override public LocatedFileStatus next() throws IOException { LocatedFileStatus status = stats.next(); // use caching locations explicitly BlockLocation[] locations = status.isFile() ? getFileBlockLocations(status.getPath(), 0, status.getLen()) : null; return new LocatedFileStatus(status, locations); } }; }
Example 2
Source File: SegmentHelper.java From indexr with Apache License 2.0 | 6 votes |
public static void literalAllSegments(FileSystem fileSystem, Path dir, Consumer<LocatedFileStatus> consumer) throws IOException { RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(dir, true); while (files.hasNext()) { LocatedFileStatus fileStatus = files.next(); if (!fileStatus.isFile()) { continue; } if (fileStatus.getLen() == 0) { continue; } Path path = fileStatus.getPath(); if (checkSegmentByPath(path)) { consumer.accept(fileStatus); } } }
Example 3
Source File: AbstractFlagConfig.java From datawave with Apache License 2.0 | 5 votes |
protected Path getTestFile(FileSystem fs) throws IOException { createTestFiles(1, 1); Path file = null; for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(this.fmc.getBaseHDFSDir()), true); it.hasNext();) { LocatedFileStatus status = it.next(); if (status.isFile()) { file = status.getPath(); break; } } return file; }
Example 4
Source File: HdfsFileWatcherPolicy.java From kafka-connect-fs with Apache License 2.0 | 5 votes |
private void enqueue(String path) throws IOException { Path filePath = new Path(path); if (!fs.exists(filePath) || fs.getFileStatus(filePath) == null) { log.info("Cannot enqueue file [{}] because it does not exist but got an event from the FS", filePath); return; } log.debug("Enqueuing file to process [{}]", filePath); RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, false); while (it.hasNext()) { LocatedFileStatus status = it.next(); if (!status.isFile() || !fileRegexp.matcher(status.getPath().getName()).find()) continue; fileQueue.offer(toMetadata(status)); } }
Example 5
Source File: TestBackupLogCleaner.java From hbase with Apache License 2.0 | 5 votes |
private List<FileStatus> getListOfWALFiles(Configuration c) throws IOException { Path logRoot = new Path(CommonFSUtils.getWALRootDir(c), HConstants.HREGION_LOGDIR_NAME); FileSystem fs = logRoot.getFileSystem(c); RemoteIterator<LocatedFileStatus> it = fs.listFiles(logRoot, true); List<FileStatus> logFiles = new ArrayList<FileStatus>(); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.isFile() && !AbstractFSWALProvider.isMetaFile(lfs.getPath())) { logFiles.add(lfs); LOG.info(Objects.toString(lfs)); } } return logFiles; }
Example 6
Source File: TestRestoreSnapshotHelper.java From hbase with Apache License 2.0 | 5 votes |
private boolean hasHFileLink(Path tableDir) throws IOException { if (fs.exists(tableDir)) { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(tableDir, true); while (iterator.hasNext()) { LocatedFileStatus fileStatus = iterator.next(); if (fileStatus.isFile() && HFileLink.isHFileLink(fileStatus.getPath())) { return true; } } } return false; }
Example 7
Source File: CompleteSetupIntegrationTest.java From searchanalytics-bigdata with MIT License | 5 votes |
private void FlumehdfsSinkAndTestData(List<Event> searchEvents) throws EventDeliveryException, IOException, FileNotFoundException { flumeHDFSSinkService.processEvents(searchEvents); // list all files and check data. Path dirPath = new Path(hadoopClusterService.getHDFSUri() + "/searchevents"); // FileStatus[] dirStat = fs.listStatus(dirPath); // Path fList[] = FileUtil.stat2Paths(dirStat); DistributedFileSystem fs = hadoopClusterService.getFileSystem(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(dirPath, true); while (files.hasNext()) { LocatedFileStatus locatedFileStatus = files.next(); System.out.println("Check:" + locatedFileStatus.getPath()); if (locatedFileStatus.isFile()) { Path path = locatedFileStatus.getPath(); if (path.getName().startsWith("searchevents")) { FSDataInputStream input = fs.open(path); BufferedReader reader = new BufferedReader( new InputStreamReader(input)); String body = null; while ((body = reader.readLine()) != null) { System.out.println("body is:" + body); } reader.close(); input.close(); } } } }
Example 8
Source File: AbstractSearchJUnit4SpringContextTests.java From searchanalytics-bigdata with MIT License | 5 votes |
protected int printAndCountHdfsFileDirData(String path, String filePrefix, boolean print, boolean count) throws IOException { int recordsCount = 0; DistributedFileSystem fs = hadoopClusterService.getFileSystem(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path(path), true); while (files.hasNext()) { LocatedFileStatus locatedFileStatus = files.next(); System.out.println("Check:" + locatedFileStatus.getPath()); if (locatedFileStatus.isFile()) { Path filePath = locatedFileStatus.getPath(); if (filePath.getName().startsWith(filePrefix)) { FSDataInputStream input = fs.open(filePath); BufferedReader reader = new BufferedReader( new InputStreamReader(input)); String body = null; while ((body = reader.readLine()) != null) { if (print) { System.out.println("file is: " + filePath.getName() + "body is:" + body); } if (count) { recordsCount++; } } reader.close(); input.close(); } } } return recordsCount; }
Example 9
Source File: FlumeHDFSSinkServiceTest.java From searchanalytics-bigdata with MIT License | 5 votes |
@Test public void testProcessEvents() throws FileNotFoundException, IOException { int searchEventsCount = 101; List<Event> searchEvents = generateSearchAnalyticsDataService .getSearchEvents(searchEventsCount); flumeHDFSSinkService.processEvents(searchEvents); // list all files and check data. Path dirPath = new Path(hadoopClusterService.getHDFSUri() + "/searchevents"); // FileStatus[] dirStat = fs.listStatus(dirPath); // Path fList[] = FileUtil.stat2Paths(dirStat); DistributedFileSystem fs = hadoopClusterService.getFileSystem(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(dirPath, true); while (files.hasNext()) { LocatedFileStatus locatedFileStatus = files.next(); System.out.println("Check:" + locatedFileStatus.getPath()); if (locatedFileStatus.isFile()) { Path path = locatedFileStatus.getPath(); if (path.getName().startsWith("searchevents")) { FSDataInputStream input = fs.open(path); BufferedReader reader = new BufferedReader( new InputStreamReader(input)); String body = null; while ((body = reader.readLine()) != null) { System.out.println("body is:" + body); } reader.close(); input.close(); } } } }
Example 10
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 4 votes |
public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job, boolean isRecursive, boolean directoriesOnly, int partitionId) { final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>(); final Path rootLocation = new Path(partitionDir); try { // TODO: DX-16001 - make async configurable for Hive. final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job); if (fs.exists(rootLocation)) { final FileStatus rootStatus = fs.getFileStatus(rootLocation); if (rootStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(true) .build()); final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false); while (statuses.hasNext()) { LocatedFileStatus fileStatus = statuses.next(); final Path filePath = fileStatus.getPath(); if (fileStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(true) .build()); } else if (fileStatus.isFile() && !directoriesOnly) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(false) .build()); } } } else { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(false) .build()); } return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder() .setPartitionId(partitionId) .setPartitionRootDir(fs.makeQualified(rootLocation).toString()) .addAllCachedEntities(cachedEntities) .build(); } return null; } catch (IOException e) { throw new RuntimeException(e); } }
Example 11
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 4 votes |
public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job, boolean isRecursive, boolean directoriesOnly, int partitionId) { final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>(); final Path rootLocation = new Path(partitionDir); try { // TODO: DX-16001 - make async configurable for Hive. final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job); if (fs.exists(rootLocation)) { final FileStatus rootStatus = fs.getFileStatus(rootLocation); if (rootStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(true) .build()); final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false); while (statuses.hasNext()) { LocatedFileStatus fileStatus = statuses.next(); final Path filePath = fileStatus.getPath(); if (fileStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(true) .build()); } else if (fileStatus.isFile() && !directoriesOnly) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(false) .build()); } } } else { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(false) .build()); } return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder() .setPartitionId(partitionId) .setPartitionRootDir(fs.makeQualified(rootLocation).toString()) .addAllCachedEntities(cachedEntities) .build(); } return null; } catch (IOException e) { throw new RuntimeException(e); } }
Example 12
Source File: DistributedFileSystemMetadata.java From hdfs-metadata with GNU General Public License v3.0 | 4 votes |
public LinkedList<BlockLocation> getBlockLocations(Path path) throws IOException { LOG.info("Collecting block locations..."); LinkedList<BlockLocation> blockLocations = new LinkedList<BlockLocation>(); RemoteIterator<LocatedFileStatus> statuses = listFiles(path, true); int hasNextCode = hasNextCode(statuses); while(hasNextCode > 0){ if(hasNextCode > 1){ hasNextCode = hasNextCode(statuses); continue; } LocatedFileStatus fileStatus = statuses.next(); if(fileStatus.isFile()){ BlockLocation[] blockLocations_tmp = getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); blockLocations.addAll(Arrays.asList(blockLocations_tmp)); } int size = blockLocations.size(); if(size > 0 && size % 5000 == 0) LOG.info("Collected " + size + " locations. Still in progress..."); if(size >= MAX_NUMBER_OF_LOCATIONS){ LOG.info("Reached max number of locations to collect. The amount will be representative enough."); break; } hasNextCode = hasNextCode(statuses); } LOG.info("Collected " + blockLocations.size() + " locations."); if(isHdfsBlocksMetadataEnabled()){ BlockStorageLocation[] blockStorageLocations = getFileBlockStorageLocations(blockLocations); blockLocations.clear(); blockLocations.addAll(Arrays.asList(blockStorageLocations)); }else{ LOG.error("VolumnId/DiskId can not be collected since " + "dfs.datanode.hdfs-blocks-metadata.enabled is not enabled."); } return blockLocations; }
Example 13
Source File: TestPlannerUtil.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testGetNonZeroLengthDataFiles() throws Exception { String queryFiles = ClassLoader.getSystemResource("queries").toString() + "/TestSelectQuery"; Path path = new Path(queryFiles); TableDesc tableDesc = new TableDesc(); tableDesc.setName("Test"); tableDesc.setUri(path.toUri()); FileSystem fs = path.getFileSystem(util.getConfiguration()); List<Path> expectedFiles = new ArrayList<>(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(path, true); while (files.hasNext()) { LocatedFileStatus file = files.next(); if (file.isFile() && file.getLen() > 0) { expectedFiles.add(file.getPath()); } } int fileNum = expectedFiles.size() / 5; int numResultFiles = 0; for (int i = 0; i <= 5; i++) { int start = i * fileNum; FragmentProto[] fragments = PhysicalPlanUtil.getNonZeroLengthDataFiles(util.getConfiguration(), tableDesc, start, fileNum); assertNotNull(fragments); numResultFiles += fragments.length; int expectedSize = fileNum; if (i == 5) { //last expectedSize = expectedFiles.size() - (fileNum * 5); } comparePath(expectedFiles, fragments, start, expectedSize); } assertEquals(expectedFiles.size(), numResultFiles); }