Java Code Examples for org.apache.hadoop.fs.FileSystem#listFiles()
The following examples show how to use
org.apache.hadoop.fs.FileSystem#listFiles() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestV2LsOperations.java From big-c with Apache License 2.0 | 6 votes |
/** * To get this project to compile under Hadoop 1, this code needs to be * commented out * * * @param fs filesystem * @param dir dir * @param subdir subdir * @param recursive recurse? * @throws IOException IO problems */ public static void assertListFilesFinds(FileSystem fs, Path dir, Path subdir, boolean recursive) throws IOException { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, recursive); boolean found = false; int entries = 0; StringBuilder builder = new StringBuilder(); while (iterator.hasNext()) { LocatedFileStatus next = iterator.next(); entries++; builder.append(next.toString()).append('\n'); if (next.getPath().equals(subdir)) { found = true; } } assertTrue("Path " + subdir + " not found in directory " + dir + " : " + " entries=" + entries + " content" + builder.toString(), found); }
Example 2
Source File: PcapFinalizer.java From metron with Apache License 2.0 | 6 votes |
/** * Returns a lazily-read Iterable over a set of sequence files. */ protected SequenceFileIterable readInterimResults(Path interimResultPath, Configuration config, FileSystem fs) throws IOException { List<Path> files = new ArrayList<>(); for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(interimResultPath, false); it.hasNext(); ) { Path p = it.next().getPath(); if (p.getName().equals("_SUCCESS")) { fs.delete(p, false); continue; } files.add(p); } if (files.size() == 0) { LOG.info("No files to process with specified date range."); } else { LOG.debug("Interim results path={}", interimResultPath); Collections.sort(files, (o1, o2) -> o1.getName().compareTo(o2.getName())); } return new SequenceFileIterable(files, config); }
Example 3
Source File: BackupUtils.java From hbase with Apache License 2.0 | 6 votes |
public static BackupInfo loadBackupInfo(Path backupRootPath, String backupId, FileSystem fs) throws IOException { Path backupPath = new Path(backupRootPath, backupId); RemoteIterator<LocatedFileStatus> it = fs.listFiles(backupPath, true); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.getPath().getName().equals(BackupManifest.MANIFEST_FILE_NAME)) { // Load BackupManifest BackupManifest manifest = new BackupManifest(fs, lfs.getPath().getParent()); BackupInfo info = manifest.toBackupInfo(); return info; } } return null; }
Example 4
Source File: SparkUtils.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * List of the files in the given directory (path), as a {@code JavaRDD<String>} * * @param sc Spark context * @param path Path to list files in * @param recursive Whether to walk the directory tree recursively (i.e., include subdirectories) * @param allowedExtensions If null: all files will be accepted. If non-null: only files with the specified extension will be allowed. * Exclude the extension separator - i.e., use "txt" not ".txt" here. * @param config Hadoop configuration to use. Must not be null. * @return Paths in the directory * @throws IOException If error occurs getting directory contents */ public static JavaRDD<String> listPaths(@NonNull JavaSparkContext sc, String path, boolean recursive, Set<String> allowedExtensions, @NonNull Configuration config) throws IOException { List<String> paths = new ArrayList<>(); FileSystem hdfs = FileSystem.get(URI.create(path), config); RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(path), recursive); while (fileIter.hasNext()) { String filePath = fileIter.next().getPath().toString(); if(allowedExtensions == null){ paths.add(filePath); } else { String ext = FilenameUtils.getExtension(path); if(allowedExtensions.contains(ext)){ paths.add(filePath); } } } return sc.parallelize(paths); }
Example 5
Source File: TestHDFSIntegration.java From incubator-sentry with Apache License 2.0 | 6 votes |
private void verifyAccessToPath(String user, String group, String path, boolean hasPermission) throws Exception{ Path p = new Path(path); UserGroupInformation hadoopUser = UserGroupInformation.createUserForTesting(user, new String[] {group}); FileSystem fs = DFSTestUtil.getFileSystemAs(hadoopUser, hadoopConf); try { fs.listFiles(p, true); if(!hasPermission) { Assert.assertFalse("Expected listing files to fail", false); } } catch (Exception e) { if(hasPermission) { throw e; } } }
Example 6
Source File: HDFSSecUtils.java From bdt with Apache License 2.0 | 6 votes |
public String listFiles(String content, String hdfsDirPath) throws IOException { String files = ""; Path path = new Path(hdfsDirPath); FileSystem fileSystem = FileSystem.get(conf); if ("files".equals(content)) { RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(path, false); while (iterator.hasNext()) { files = files + iterator.next().getPath().getName() + "\n"; } } else { FileStatus[] status = fileSystem.listStatus(path); for (int i = 0; i < status.length; i++) { if (status[i].isDirectory()) { files = files + status[i].getPath().getName() + "/\n"; } else { files = files + status[i].getPath().getName() + "\n"; } } } return files.trim(); }
Example 7
Source File: JobLibLoader.java From SpyGlass with Apache License 2.0 | 6 votes |
public static void loadJars(String libPathStr, Configuration config) { try { Path libPath = new Path(libPathStr); FileSystem fs = FileSystem.get(config); RemoteIterator<LocatedFileStatus> itr = fs.listFiles(libPath, true); while (itr.hasNext()) { LocatedFileStatus f = itr.next(); if (!f.isDirectory() && f.getPath().getName().endsWith("jar")) { logger.info("Loading Jar : " + f.getPath().getName()); DistributedCache.addFileToClassPath(f.getPath(), config); } } } catch (Exception e) { e.printStackTrace(); logger.error(e.toString()); } }
Example 8
Source File: GenerateData.java From hadoop with Apache License 2.0 | 6 votes |
static DataStatistics publishPlainDataStatistics(Configuration conf, Path inputDir) throws IOException { FileSystem fs = inputDir.getFileSystem(conf); // obtain input data file statuses long dataSize = 0; long fileCount = 0; RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true); PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter(); while (iter.hasNext()) { LocatedFileStatus lStatus = iter.next(); if (filter.accept(lStatus.getPath())) { dataSize += lStatus.getLen(); ++fileCount; } } // publish the plain data statistics LOG.info("Total size of input data : " + StringUtils.humanReadableInt(dataSize)); LOG.info("Total number of input data files : " + fileCount); return new DataStatistics(dataSize, fileCount, false); }
Example 9
Source File: CommonFSUtils.java From hbase with Apache License 2.0 | 6 votes |
/** * Calls fs.listFiles() to get FileStatus and BlockLocations together for reducing rpc call * * @param fs file system * @param dir directory * @return LocatedFileStatus list */ public static List<LocatedFileStatus> listLocatedStatus(final FileSystem fs, final Path dir) throws IOException { List<LocatedFileStatus> status = null; try { RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs .listFiles(dir, false); while (locatedFileStatusRemoteIterator.hasNext()) { if (status == null) { status = Lists.newArrayList(); } status.add(locatedFileStatusRemoteIterator.next()); } } catch (FileNotFoundException fnfe) { // if directory doesn't exist, return null if (LOG.isTraceEnabled()) { LOG.trace("{} doesn't exist", dir); } } return status; }
Example 10
Source File: HoodieTestUtils.java From hudi with Apache License 2.0 | 5 votes |
public static FileStatus[] listAllLogFilesInPath(FileSystem fs, String basePath, String logfileExtension) throws IOException { RemoteIterator<LocatedFileStatus> itr = fs.listFiles(new Path(basePath), true); List<FileStatus> returns = new ArrayList<>(); while (itr.hasNext()) { LocatedFileStatus status = itr.next(); if (status.getPath().getName().contains(logfileExtension)) { returns.add(status); } } return returns.toArray(new FileStatus[returns.size()]); }
Example 11
Source File: CorruptDataFilesAction.java From hbase with Apache License 2.0 | 5 votes |
@Override public void perform() throws Exception { getLogger().info("Start corrupting data files"); FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf()); Path rootDir = CommonFSUtils.getRootDir(getConf()); Path defaultDir = rootDir.suffix("/data/default"); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(defaultDir, true); while (iterator.hasNext()){ LocatedFileStatus status = iterator.next(); if(!HFile.isHFileFormat(fs, status.getPath())){ continue; } if(RandomUtils.nextFloat(0, 100) > chance){ continue; } FSDataOutputStream out = fs.create(status.getPath(), true); try { out.write(0); } finally { out.close(); } getLogger().info("Corrupting {}", status.getPath()); } getLogger().info("Done corrupting data files"); }
Example 12
Source File: HdfsIOBenchmark.java From crail with Apache License 2.0 | 5 votes |
void browseDir() throws Exception { System.out.println("reading enumarate dir, path " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); //benchmark System.out.println("starting benchmark..."); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false); while (iter.hasNext()) { LocatedFileStatus status = iter.next(); System.out.println(status.getPath()); } fs.close(); }
Example 13
Source File: YarnFileStageTest.java From flink with Apache License 2.0 | 5 votes |
/** * Verifies the content and name of file in the directory {@code worDir} are same with {@code expectedFiles}. * @param targetFileSystem the filesystem type of {@code workDir} * @param workDir the directory verified * @param expectedFiles the expected name and content of the files * @throws IOException if error occurs when visiting the {@code workDir} * @throws InterruptedException if the sleep is interrupted. */ private static void verifyDirectoryRecursive( FileSystem targetFileSystem, Path workDir, Map<String, String> expectedFiles) throws IOException, InterruptedException { final HashMap<String /* (relative) path */, /* contents */ String> targetFiles = new HashMap<>(); final RemoteIterator<LocatedFileStatus> targetFilesIterator = targetFileSystem.listFiles(workDir, true); final int workDirPrefixLength = workDir.toString().length() + 1; // one more for the concluding "/" while (targetFilesIterator.hasNext()) { final LocatedFileStatus targetFile = targetFilesIterator.next(); int retries = 5; do { try (FSDataInputStream in = targetFileSystem.open(targetFile.getPath())) { String absolutePathString = targetFile.getPath().toString(); String relativePath = absolutePathString.substring(workDirPrefixLength); targetFiles.put(relativePath, in.readUTF()); assertEquals("extraneous data in file " + relativePath, -1, in.read()); break; } catch (FileNotFoundException e) { // For S3, read-after-write may be eventually consistent, i.e. when trying // to access the object before writing it; see // https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel // -> try again a bit later Thread.sleep(50); } } while ((retries--) > 0); } assertThat(targetFiles, equalTo(expectedFiles)); }
Example 14
Source File: RestoreTablesClient.java From hbase with Apache License 2.0 | 5 votes |
private List<Path> getFilesRecursively(String fileBackupDir) throws IllegalArgumentException, IOException { FileSystem fs = FileSystem.get((new Path(fileBackupDir)).toUri(), new Configuration()); List<Path> list = new ArrayList<>(); RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(fileBackupDir), true); while (it.hasNext()) { Path p = it.next().getPath(); if (HFile.isHFileFormat(fs, p)) { list.add(p); } } return list; }
Example 15
Source File: HdfsRm.java From BigData-In-Practice with Apache License 2.0 | 5 votes |
/** * 列出文件夹下的所有文件 */ public static void listFiles(String remotePathStr, boolean recursive) throws IOException { Path remotePath = new Path(remotePathStr); FileSystem fileSystem = SysUtil.getFileSystem(); RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(remotePath, recursive); System.out.println(String.format("文件夹《%s》下的所有文件:", remotePathStr)); while (iterator.hasNext()) { System.out.println(iterator.next()); } fileSystem.close(); }
Example 16
Source File: TestBackupBase.java From hbase with Apache License 2.0 | 5 votes |
protected void dumpBackupDir() throws IOException { // Dump Backup Dir FileSystem fs = FileSystem.get(conf1); RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(BACKUP_ROOT_DIR), true); while (it.hasNext()) { LOG.debug(Objects.toString(it.next().getPath())); } }
Example 17
Source File: HDFSFileManagerImpl.java From entrada with GNU General Public License v3.0 | 5 votes |
@Override public List<String> expired(String location, int maxAge, String... filter) { if (!exists(location)) { log.error("Location {} does not exist, cannot continue", location); return Collections.emptyList(); } List<String> files = new ArrayList<>(); FileSystem fs = null; try { fs = createFS(); RemoteIterator<LocatedFileStatus> fileStatusListIterator = fs.listFiles(new Path(location), true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); files.add(fileStatus.getPath().toString()); } } catch (Exception e) { log.error("Error while getting files", e); return Collections.emptyList(); } // retrun found files, can be partial list in case of an exception return files .stream() .filter(p -> checkFilter(p, Arrays.asList(filter))) .collect(Collectors.toList()); }
Example 18
Source File: ProtoParquetWriterWithOffsetTest.java From garmadon with Apache License 2.0 | 5 votes |
private Set<LocatedFileStatus> listFiles(FileSystem fs, Path p) throws IOException { RemoteIterator<LocatedFileStatus> it = fs.listFiles(p, true); Set<LocatedFileStatus> s = new HashSet<>(); while (it.hasNext()) { s.add(it.next()); } return s; }
Example 19
Source File: TestPlannerUtil.java From tajo with Apache License 2.0 | 4 votes |
@Test public void testGetNonZeroLengthDataFiles() throws Exception { String queryFiles = ClassLoader.getSystemResource("queries").toString() + "/TestSelectQuery"; Path path = new Path(queryFiles); TableDesc tableDesc = new TableDesc(); tableDesc.setName("Test"); tableDesc.setUri(path.toUri()); FileSystem fs = path.getFileSystem(util.getConfiguration()); List<Path> expectedFiles = new ArrayList<>(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(path, true); while (files.hasNext()) { LocatedFileStatus file = files.next(); if (file.isFile() && file.getLen() > 0) { expectedFiles.add(file.getPath()); } } int fileNum = expectedFiles.size() / 5; int numResultFiles = 0; for (int i = 0; i <= 5; i++) { int start = i * fileNum; FragmentProto[] fragments = PhysicalPlanUtil.getNonZeroLengthDataFiles(util.getConfiguration(), tableDesc, start, fileNum); assertNotNull(fragments); numResultFiles += fragments.length; int expectedSize = fileNum; if (i == 5) { //last expectedSize = expectedFiles.size() - (fileNum * 5); } comparePath(expectedFiles, fragments, start, expectedSize); } assertEquals(expectedFiles.size(), numResultFiles); }
Example 20
Source File: TestSparkMultiLayerParameterAveraging.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testFitViaStringPaths() throws Exception { Path tempDir = testDir.newFolder("DL4J-testFitViaStringPaths").toPath(); File tempDirF = tempDir.toFile(); tempDirF.deleteOnExit(); int dataSetObjSize = 5; int batchSizePerExecutor = 25; DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize, 1000, false); int i = 0; while (iter.hasNext()) { File nextFile = new File(tempDirF, i + ".bin"); DataSet ds = iter.next(); ds.save(nextFile); i++; } System.out.println("Saved to: " + tempDirF.getAbsolutePath()); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new RmsProp()) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list() .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50) .activation(Activation.TANH).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10) .activation(Activation.SOFTMAX).build()) .build(); SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize) .workerPrefetchNumBatches(5).batchSizePerWorker(batchSizePerExecutor) .averagingFrequency(1).repartionData(Repartition.Always).build()); sparkNet.setCollectTrainingStats(true); //List files: Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(tempDir.toUri(), config); RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir.toString()), false); List<String> paths = new ArrayList<>(); while (fileIter.hasNext()) { String path = fileIter.next().getPath().toString(); paths.add(path); } INDArray paramsBefore = sparkNet.getNetwork().params().dup(); JavaRDD<String> pathRdd = sc.parallelize(paths); sparkNet.fitPaths(pathRdd); INDArray paramsAfter = sparkNet.getNetwork().params().dup(); assertNotEquals(paramsBefore, paramsAfter); SparkTrainingStats stats = sparkNet.getSparkTrainingStats(); // System.out.println(stats.statsAsString()); stats.statsAsString(); sparkNet.getTrainingMaster().deleteTempFiles(sc); }