org.apache.hadoop.fs.LocatedFileStatus Java Examples

The following examples show how to use org.apache.hadoop.fs.LocatedFileStatus. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FSAgent.java    From Bats with Apache License 2.0 7 votes vote down vote up
public List<String> listFiles(String dir) throws IOException
{
  List<String> files = new ArrayList<>();
  Path path = new Path(dir);

  FileStatus fileStatus = fileSystem.getFileStatus(path);
  if (!fileStatus.isDirectory()) {
    throw new FileNotFoundException("Cannot read directory " + dir);
  }
  RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
  while (it.hasNext()) {
    LocatedFileStatus lfs = it.next();
    files.add(lfs.getPath().getName());
  }
  return files;
}
 
Example #2
Source File: TestV2LsOperations.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * To get this project to compile under Hadoop 1, this code needs to be
 * commented out
 *
 *
 * @param fs filesystem
 * @param dir dir
 * @param subdir subdir
 * @param recursive recurse?
 * @throws IOException IO problems
 */
public static void assertListFilesFinds(FileSystem fs,
                                        Path dir,
                                        Path subdir,
                                        boolean recursive) throws IOException {
  RemoteIterator<LocatedFileStatus> iterator =
    fs.listFiles(dir, recursive);
  boolean found = false;
  int entries = 0;
  StringBuilder builder = new StringBuilder();
  while (iterator.hasNext()) {
    LocatedFileStatus next = iterator.next();
    entries++;
    builder.append(next.toString()).append('\n');
    if (next.getPath().equals(subdir)) {
      found = true;
    }
  }
  assertTrue("Path " + subdir
             + " not found in directory " + dir + " : "
             + " entries=" + entries
             + " content"
             + builder.toString(),
             found);
}
 
Example #3
Source File: FSAgent.java    From Bats with Apache License 2.0 6 votes vote down vote up
public List<LocatedFileStatus> listFilesInfo(String dir) throws IOException
{
  List<LocatedFileStatus> files = new ArrayList<>();
  Path path = new Path(dir);

  FileStatus fileStatus = fileSystem.getFileStatus(path);
  if (!fileStatus.isDirectory()) {
    throw new FileNotFoundException("Cannot read directory " + dir);
  }
  RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
  while (it.hasNext()) {
    LocatedFileStatus lfs = it.next();
    files.add(lfs);
  }
  return files;
}
 
Example #4
Source File: CachingDirectoryLister.java    From presto with Apache License 2.0 6 votes vote down vote up
private static RemoteIterator<LocatedFileStatus> simpleRemoteIterator(List<LocatedFileStatus> files)
{
    return new RemoteIterator<LocatedFileStatus>()
    {
        private final Iterator<LocatedFileStatus> iterator = ImmutableList.copyOf(files).iterator();

        @Override
        public boolean hasNext()
        {
            return iterator.hasNext();
        }

        @Override
        public LocatedFileStatus next()
        {
            return iterator.next();
        }
    };
}
 
Example #5
Source File: GenerateData.java    From hadoop with Apache License 2.0 6 votes vote down vote up
static DataStatistics publishPlainDataStatistics(Configuration conf, 
                                                 Path inputDir) 
throws IOException {
  FileSystem fs = inputDir.getFileSystem(conf);

  // obtain input data file statuses
  long dataSize = 0;
  long fileCount = 0;
  RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
  PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
  while (iter.hasNext()) {
    LocatedFileStatus lStatus = iter.next();
    if (filter.accept(lStatus.getPath())) {
      dataSize += lStatus.getLen();
      ++fileCount;
    }
  }

  // publish the plain data statistics
  LOG.info("Total size of input data : " 
           + StringUtils.humanReadableInt(dataSize));
  LOG.info("Total number of input data files : " + fileCount);
  
  return new DataStatistics(dataSize, fileCount, false);
}
 
Example #6
Source File: TestRemoteNodeFileSystemDual.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Test
public void basicClientReadWrite() throws Exception {
  Path basePath = new Path(temporaryFolder.newFolder().getAbsolutePath());
  Path path = ((PathCanonicalizer) clientFS).canonicalizePath(new Path(basePath, "testfile.bytes"));
  final byte[] randomBytesMoreThanBuffer = new byte[RemoteNodeFileSystem.REMOTE_WRITE_BUFFER_SIZE_DEFAULT * 3];
  Random r = new Random();
  r.nextBytes(randomBytesMoreThanBuffer);

  try(FSDataOutputStream stream = clientFS.create(path, false)){
    stream.write(randomBytesMoreThanBuffer);
  }


  RemoteIterator<LocatedFileStatus> iter = client.fileSystem.listFiles(basePath, false);
  assertEquals(true, iter.hasNext());
  LocatedFileStatus status = iter.next();

  try(FSDataInputStream in = clientFS.open(status.getPath())){
    byte[] back = new byte[randomBytesMoreThanBuffer.length];
    int dataRead = in.read(back);
    assertEquals(back.length, dataRead);
    assertTrue(Arrays.equals(randomBytesMoreThanBuffer, back));
  }
  client.fileSystem.delete(status.getPath(), false);
}
 
Example #7
Source File: TestBackgroundHiveSplitLoader.java    From presto with Apache License 2.0 6 votes vote down vote up
private static BackgroundHiveSplitLoader backgroundHiveSplitLoader(
        List<LocatedFileStatus> files,
        TupleDomain<HiveColumnHandle> compactEffectivePredicate,
        Optional<HiveBucketFilter> hiveBucketFilter,
        Table table,
        Optional<HiveBucketHandle> bucketHandle,
        Optional<ValidWriteIdList> validWriteIds)
{
    return backgroundHiveSplitLoader(
            new TestingHdfsEnvironment(files),
            compactEffectivePredicate,
            hiveBucketFilter,
            table,
            bucketHandle,
            validWriteIds);
}
 
Example #8
Source File: LocatedFileStatusFetcher.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public Result call() throws Exception {
  Result result = new Result();
  result.fs = fs;

  if (fileStatus.isDirectory()) {
    RemoteIterator<LocatedFileStatus> iter = fs
        .listLocatedStatus(fileStatus.getPath());
    while (iter.hasNext()) {
      LocatedFileStatus stat = iter.next();
      if (inputFilter.accept(stat.getPath())) {
        if (recursive && stat.isDirectory()) {
          result.dirsNeedingRecursiveCalls.add(stat);
        } else {
          result.locatedFileStatuses.add(stat);
        }
      }
    }
  } else {
    result.locatedFileStatuses.add(fileStatus);
  }
  return result;
}
 
Example #9
Source File: CreateHDFSStoreTest.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
protected int  getExtensioncount(final String hdfsstore, final String extension) throws Exception {
int counter =0 ;
HDFSStoreImpl hdfsStore = (HDFSStoreImpl) GemFireCacheImpl.getInstance().findHDFSStore(hdfsstore);
FileSystem fs = hdfsStore.getFileSystem();
try {
  Path basePath = new Path(hdfsStore.getHomeDir());
  
  RemoteIterator<LocatedFileStatus> files = fs.listFiles(basePath, true);
      
  while(files.hasNext()) {
    HashMap<String, String> entriesMap = new HashMap<String, String>();
    LocatedFileStatus next = files.next();
    if (next.getPath().getName().endsWith(extension))
      counter++;
  }
} catch (IOException e) {
  e.printStackTrace();
}
return counter; 
}
 
Example #10
Source File: RaidNode.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public static List<LocatedFileStatus> listDirectoryRaidLocatedFileStatus(
		Configuration conf, FileSystem srcFs, Path p) throws IOException {
	long minFileSize = conf.getLong(MINIMUM_RAIDABLE_FILESIZE_KEY,
			MINIMUM_RAIDABLE_FILESIZE);
	List<LocatedFileStatus> lfs = new ArrayList<LocatedFileStatus>();
	RemoteIterator<LocatedFileStatus> iter = srcFs.listLocatedStatus(p);
	while (iter.hasNext()) {
		LocatedFileStatus stat = iter.next();
		if (stat.isDir()) {
			return null;
		}
		// We don't raid too small files
		if (stat.getLen() < minFileSize) {
			continue;
		}
		lfs.add(stat);
	}
	if (lfs.size() == 0)
		return null;
	return lfs;
}
 
Example #11
Source File: SparkUtils.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * List of the files in the given directory (path), as a {@code JavaRDD<String>}
 *
 * @param sc                Spark context
 * @param path              Path to list files in
 * @param recursive         Whether to walk the directory tree recursively (i.e., include subdirectories)
 * @param allowedExtensions If null: all files will be accepted. If non-null: only files with the specified extension will be allowed.
 *                          Exclude the extension separator - i.e., use "txt" not ".txt" here.
 * @param config            Hadoop configuration to use. Must not be null.
 * @return Paths in the directory
 * @throws IOException If error occurs getting directory contents
 */
public static JavaRDD<String> listPaths(@NonNull JavaSparkContext sc, String path, boolean recursive,
                                        Set<String> allowedExtensions, @NonNull Configuration config) throws IOException {
    List<String> paths = new ArrayList<>();
    FileSystem hdfs = FileSystem.get(URI.create(path), config);
    RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(path), recursive);

    while (fileIter.hasNext()) {
        String filePath = fileIter.next().getPath().toString();
        if(allowedExtensions == null){
            paths.add(filePath);
        } else {
            String ext = FilenameUtils.getExtension(path);
            if(allowedExtensions.contains(ext)){
                paths.add(filePath);
            }
        }
    }
    return sc.parallelize(paths);
}
 
Example #12
Source File: TestBackgroundHiveSplitLoader.java    From presto with Apache License 2.0 6 votes vote down vote up
@Override
public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f)
{
    return new RemoteIterator<LocatedFileStatus>()
    {
        private final Iterator<LocatedFileStatus> iterator = files.iterator();

        @Override
        public boolean hasNext()
        {
            return iterator.hasNext();
        }

        @Override
        public LocatedFileStatus next()
        {
            return iterator.next();
        }
    };
}
 
Example #13
Source File: HdfsIOBenchmark.java    From crail with Apache License 2.0 6 votes vote down vote up
void enumerateDir() throws Exception {
	System.out.println("enumarate dir, path " + path);
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(conf); 

	int repfactor = 4;
	for (int k = 0; k < repfactor; k++) {
		long start = System.currentTimeMillis();
		for (int i = 0; i < size; i++) {
			// single operation == loop
			RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false);
			while (iter.hasNext()) {
				iter.next();
			}
		}
		long end = System.currentTimeMillis();
		double executionTime = ((double) (end - start));
		double latency = executionTime * 1000.0 / ((double) size);
		System.out.println("execution time [ms] " + executionTime);
		System.out.println("latency [us] " + latency);
	}
	fs.close();
}
 
Example #14
Source File: DeleteDataFilesAction.java    From hbase with Apache License 2.0 6 votes vote down vote up
@Override
public void perform() throws Exception {
  getLogger().info("Start deleting data files");
  FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
  Path rootDir = CommonFSUtils.getRootDir(getConf());
  Path defaultDir = rootDir.suffix("/data/default");
  RemoteIterator<LocatedFileStatus> iterator =  fs.listFiles(defaultDir, true);
  while (iterator.hasNext()){
    LocatedFileStatus status = iterator.next();
    if(!HFile.isHFileFormat(fs, status.getPath())){
      continue;
    }
    if(RandomUtils.nextFloat(0, 100) > chance){
      continue;
    }
    fs.delete(status.getPath(), true);
    getLogger().info("Deleting {}", status.getPath());
  }
  getLogger().info("Done deleting data files");
}
 
Example #15
Source File: CreateHDFSStoreTest.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
protected int  getExtensioncount(final String hdfsstore, final String extension) throws Exception {
int counter =0 ;
HDFSStoreImpl hdfsStore = (HDFSStoreImpl) GemFireCacheImpl.getInstance().findHDFSStore(hdfsstore);
FileSystem fs = hdfsStore.getFileSystem();
try {
  Path basePath = new Path(hdfsStore.getHomeDir());
  
  RemoteIterator<LocatedFileStatus> files = fs.listFiles(basePath, true);
      
  while(files.hasNext()) {
    HashMap<String, String> entriesMap = new HashMap<String, String>();
    LocatedFileStatus next = files.next();
    if (next.getPath().getName().endsWith(extension))
      counter++;
  }
} catch (IOException e) {
  e.printStackTrace();
}
return counter; 
}
 
Example #16
Source File: LocatedFileStatusFetcher.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public Result call() throws Exception {
  Result result = new Result();
  result.fs = fs;

  if (fileStatus.isDirectory()) {
    RemoteIterator<LocatedFileStatus> iter = fs
        .listLocatedStatus(fileStatus.getPath());
    while (iter.hasNext()) {
      LocatedFileStatus stat = iter.next();
      if (inputFilter.accept(stat.getPath())) {
        if (recursive && stat.isDirectory()) {
          result.dirsNeedingRecursiveCalls.add(stat);
        } else {
          result.locatedFileStatuses.add(stat);
        }
      }
    }
  } else {
    result.locatedFileStatuses.add(fileStatus);
  }
  return result;
}
 
Example #17
Source File: DatanodeBenThread.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public int getNumberOfFiles() throws IOException {
  DistributedFileSystem dfs = (DistributedFileSystem)fs;
  RemoteIterator<LocatedFileStatus> iter = dfs.listLocatedStatus(outputPath);
  int fn = 0;
  while (iter.hasNext()) {
    LocatedFileStatus lfs = iter.next();
    if (lfs.isDir()) 
      continue;
    if (lfs.getBlockLocations().length != 1) 
      continue;
    String curHost = rtc.cur_datanode;
    for (String host: lfs.getBlockLocations()[0].getHosts()) {
      if (curHost.equals(host)){
        fn++;
        break;
      }
    }
  }
  LOG.info(" Found " + fn + " files in " + dfs.getUri());
  return fn;
}
 
Example #18
Source File: HDFSFunctions.java    From vxquery with Apache License 2.0 6 votes vote down vote up
/**
 * Searches the given directory for the file.
 *
 * @param directory
 *            to search
 * @param filename
 *            of file we want
 * @return path if file exists in this directory.else return null.
 */
public Path searchInDirectory(Path directory, String filename) {
    //Search the files and folder in this Path to find the one matching the filename.
    try {
        RemoteIterator<LocatedFileStatus> it = fs.listFiles(directory, true);
        String[] parts;
        Path path;
        while (it.hasNext()) {
            path = it.next().getPath();
            parts = path.toString().split("/");
            if (parts[parts.length - 1].equals(filename)) {
                return path;
            }
        }
    } catch (IOException e) {
        if (LOGGER.isLoggable(Level.SEVERE)) {
            LOGGER.severe(e.getMessage());
        }
    }
    return null;
}
 
Example #19
Source File: TestDistributedFileSystem.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test(timeout=60000)
public void testListFiles() throws IOException {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
  
  try {
    DistributedFileSystem fs = cluster.getFileSystem();

    final Path relative = new Path("relative");
    fs.create(new Path(relative, "foo")).close();

    final List<LocatedFileStatus> retVal = new ArrayList<LocatedFileStatus>();
    final RemoteIterator<LocatedFileStatus> iter = fs.listFiles(relative, true);
    while (iter.hasNext()) {
      retVal.add(iter.next());
    }
    System.out.println("retVal = " + retVal);
  } finally {
    cluster.shutdown();
  }
}
 
Example #20
Source File: TestListFilesInFileContext.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Test when input path is a file */
@Test
public void testFile() throws IOException {
  fc.mkdir(TEST_DIR, FsPermission.getDefault(), true);
  writeFile(fc, FILE1, FILE_LEN);

  RemoteIterator<LocatedFileStatus> itor = fc.util().listFiles(
      FILE1, true);
  LocatedFileStatus stat = itor.next();
  assertFalse(itor.hasNext());
  assertTrue(stat.isFile());
  assertEquals(FILE_LEN, stat.getLen());
  assertEquals(fc.makeQualified(FILE1), stat.getPath());
  assertEquals(1, stat.getBlockLocations().length);
  
  itor = fc.util().listFiles(FILE1, false);
  stat = itor.next();
  assertFalse(itor.hasNext());
  assertTrue(stat.isFile());
  assertEquals(FILE_LEN, stat.getLen());
  assertEquals(fc.makeQualified(FILE1), stat.getPath());
  assertEquals(1, stat.getBlockLocations().length);
}
 
Example #21
Source File: GenerateData.java    From big-c with Apache License 2.0 6 votes vote down vote up
static DataStatistics publishPlainDataStatistics(Configuration conf, 
                                                 Path inputDir) 
throws IOException {
  FileSystem fs = inputDir.getFileSystem(conf);

  // obtain input data file statuses
  long dataSize = 0;
  long fileCount = 0;
  RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
  PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
  while (iter.hasNext()) {
    LocatedFileStatus lStatus = iter.next();
    if (filter.accept(lStatus.getPath())) {
      dataSize += lStatus.getLen();
      ++fileCount;
    }
  }

  // publish the plain data statistics
  LOG.info("Total size of input data : " 
           + StringUtils.humanReadableInt(dataSize));
  LOG.info("Total number of input data files : " + fileCount);
  
  return new DataStatistics(dataSize, fileCount, false);
}
 
Example #22
Source File: TestINodeFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
private static void checkEquals(RemoteIterator<LocatedFileStatus> i1,
    RemoteIterator<LocatedFileStatus> i2) throws IOException {
  while (i1.hasNext()) {
    assertTrue(i2.hasNext());
    
    // Compare all the fields but the path name, which is relative
    // to the original path from listFiles.
    LocatedFileStatus l1 = i1.next();
    LocatedFileStatus l2 = i2.next();
    assertEquals(l1.getAccessTime(), l2.getAccessTime());
    assertEquals(l1.getBlockSize(), l2.getBlockSize());
    assertEquals(l1.getGroup(), l2.getGroup());
    assertEquals(l1.getLen(), l2.getLen());
    assertEquals(l1.getModificationTime(), l2.getModificationTime());
    assertEquals(l1.getOwner(), l2.getOwner());
    assertEquals(l1.getPermission(), l2.getPermission());
    assertEquals(l1.getReplication(), l2.getReplication());
  }
  assertFalse(i2.hasNext());
}
 
Example #23
Source File: TestINodeFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private static void checkEquals(RemoteIterator<LocatedFileStatus> i1,
    RemoteIterator<LocatedFileStatus> i2) throws IOException {
  while (i1.hasNext()) {
    assertTrue(i2.hasNext());
    
    // Compare all the fields but the path name, which is relative
    // to the original path from listFiles.
    LocatedFileStatus l1 = i1.next();
    LocatedFileStatus l2 = i2.next();
    assertEquals(l1.getAccessTime(), l2.getAccessTime());
    assertEquals(l1.getBlockSize(), l2.getBlockSize());
    assertEquals(l1.getGroup(), l2.getGroup());
    assertEquals(l1.getLen(), l2.getLen());
    assertEquals(l1.getModificationTime(), l2.getModificationTime());
    assertEquals(l1.getOwner(), l2.getOwner());
    assertEquals(l1.getPermission(), l2.getPermission());
    assertEquals(l1.getReplication(), l2.getReplication());
  }
  assertFalse(i2.hasNext());
}
 
Example #24
Source File: StashFileSystem.java    From emodb with Apache License 2.0 6 votes vote down vote up
@Override
public List<SplitPath> getInputSplits(Configuration config, Path path, int splitSize)
        throws IOException {
    ImmutableList.Builder<SplitPath> splits = ImmutableList.builder();

    RemoteIterator<LocatedFileStatus> files = listFiles(path, false);
    if (!files.hasNext()) {
        // No splits.  Don't return nothing, return a single empty split
        String table = getTableName(_rootPath, path);
        return ImmutableList.of(new SplitPath(getSplitPath(_rootPath, table, getEmptySplitFileName()), 1));
    }

    while (files.hasNext()) {
        LocatedFileStatus file = files.next();
        splits.add(new SplitPath(file.getPath(), file.getLen()));
    }

    return splits.build();
}
 
Example #25
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
Example #26
Source File: YarnApplicationFileUploader.java    From flink with Apache License 2.0 5 votes vote down vote up
private Map<String, FileStatus> getAllFilesInProvidedLibDirs(final List<Path> providedLibDirs) {
	final Map<String, FileStatus> allFiles = new HashMap<>();
	checkNotNull(providedLibDirs).forEach(
		FunctionUtils.uncheckedConsumer(
			path -> {
				if (!fileSystem.exists(path) || !fileSystem.isDirectory(path)) {
					LOG.warn("Provided lib dir {} does not exist or is not a directory. Ignoring.", path);
				} else {
					final RemoteIterator<LocatedFileStatus> iterable = fileSystem.listFiles(path, true);
					while (iterable.hasNext()) {
						final LocatedFileStatus locatedFileStatus = iterable.next();

						final String name = path.getParent().toUri()
								.relativize(locatedFileStatus.getPath().toUri())
								.toString();

						final FileStatus prevMapping = allFiles.put(name, locatedFileStatus);
						if (prevMapping != null) {
							throw new IOException(
								"Two files with the same filename exist in the shared libs: " +
									prevMapping.getPath() + " - " + locatedFileStatus.getPath() +
									". Please deduplicate.");
						}
					}

					if (LOG.isDebugEnabled()) {
						LOG.debug("The following files were found in the shared lib dir: {}",
								allFiles.values().stream()
										.map(fileStatus -> fileStatus.getPath().toString())
										.collect(Collectors.joining(", ")));
					}
				}
			})
	);
	return Collections.unmodifiableMap(allFiles);
}
 
Example #27
Source File: HdfsIOBenchmark.java    From incubator-crail with Apache License 2.0 5 votes vote down vote up
void browseDir() throws Exception {
	System.out.println("reading enumarate dir, path " + path);
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(conf); 
	
	//benchmark
	System.out.println("starting benchmark...");
	RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false);
	while (iter.hasNext()) {
		LocatedFileStatus status = iter.next();
		System.out.println(status.getPath());
	}		
	fs.close();
}
 
Example #28
Source File: HdfsFileWatcherPolicy.java    From kafka-connect-fs with Apache License 2.0 5 votes vote down vote up
private void enqueue(String path) throws IOException {
    Path filePath = new Path(path);
    if (!fs.exists(filePath) || fs.getFileStatus(filePath) == null) {
        log.info("Cannot enqueue file [{}] because it does not exist but got an event from the FS", filePath);
        return;
    }

    log.debug("Enqueuing file to process [{}]", filePath);
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, false);
    while (it.hasNext()) {
        LocatedFileStatus status = it.next();
        if (!status.isFile() || !fileRegexp.matcher(status.getPath().getName()).find()) continue;
        fileQueue.offer(toMetadata(status));
    }
}
 
Example #29
Source File: MapReduceFSFetcherHadoop2.java    From dr-elephant with Apache License 2.0 5 votes vote down vote up
private void addJobsForHistoryDir(String historyDir, List<AnalyticJob> jobs, long startTime, long endTime)
    throws Exception {
  if (_fs.exists(new Path(historyDir))) {
    RemoteIterator<LocatedFileStatus> it = _fs.listFiles(new Path(historyDir), true);
    while (it.hasNext()) {
      String histFilename = it.next().getPath().getName();
      if (histFilename.endsWith(".jhist")) {
        try {
          JobIndexInfo indexInfo = FileNameIndexUtils.getIndexInfo(histFilename);
          String appId = Utils.getApplicationIdFromJobId(indexInfo.getJobId().toString());
          // Add the job only if required.
          if (indexInfo.getFinishTime() >= startTime && indexInfo.getFinishTime() <= endTime) {
            jobs.add(new AnalyticJob().setAppId(appId).setStartTime(indexInfo.getSubmitTime()).
                setFinishTime(indexInfo.getFinishTime()).setName(indexInfo.getJobName()).
                setUser(indexInfo.getUser()).setQueueName(indexInfo.getQueueName()).
                setAppType(_fetcherConfigurationData.getAppType()));
          }
        } catch (IOException e) {
          // Fall back to parsing the filename by ourselves.
          String[] jobDetails = histFilename.split("-");
          jobs.add(new AnalyticJob().setAppId(Utils.getApplicationIdFromJobId(jobDetails[0])).
              setAppType(_fetcherConfigurationData.getAppType()));
        }
      }
    }
  }
}
 
Example #30
Source File: FileSegmentPool.java    From indexr with Apache License 2.0 5 votes vote down vote up
public void refreshLocalities() {
    try {
        // HashMap taks muti-thread risk here. Change to ConcurrentHashMap if it happens.
        Map<String, List<String>> newHostMap = new HashMap<>(segmentFdMap.size());

        RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(segmentRootPath, true);
        while (files.hasNext()) {
            LocatedFileStatus fileStatus = files.next();
            if (fileStatus.getLen() == 0) {
                continue;
            }
            String name = getSegmentName(fileStatus);
            if (name == null) {
                continue;
            }
            BlockLocation[] locations = fileStatus.getBlockLocations();
            if (locations.length != 1) {
                logger.error("A segment should only consisted by one block, now {}. Ignored: {}", locations.length, name);
                continue;
            }
            List<String> hosts = Arrays.asList(locations[0].getHosts());
            newHostMap.put(name, hosts);
        }

        hostMap = newHostMap;
    } catch (IOException e) {
        if (e instanceof ClosedByInterruptException) {
            logger.warn("Refresh [{}] segment locality failed by ClosedByInterruptException.", tableName);
            // Normally close interrupt.
            return;
        }
        String msg = e.getMessage();
        if (msg != null && Strings.equals(msg.trim(), "Filesystem closed")) {
            logger.warn("Refresh [{}] segment locality failed by Filesystem closed.", tableName);
            // Normally close interrupt.
            return;
        }
        logger.warn("Refresh [{}] segment locality failed.", tableName, e);
    }
}