Java Code Examples for org.apache.hadoop.fs.LocatedFileStatus#getBlockLocations()

The following examples show how to use org.apache.hadoop.fs.LocatedFileStatus#getBlockLocations() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestOzoneFileInterfaces.java    From hadoop-ozone with Apache License 2.0 6 votes vote down vote up
@Test
public void testOzoneManagerLocatedFileStatus() throws IOException {
  String data = RandomStringUtils.randomAlphanumeric(20);
  String filePath = RandomStringUtils.randomAlphanumeric(5);
  Path path = createPath("/" + filePath);
  try (FSDataOutputStream stream = fs.create(path)) {
    stream.writeBytes(data);
  }
  FileStatus status = fs.getFileStatus(path);
  assertTrue(status instanceof LocatedFileStatus);
  LocatedFileStatus locatedFileStatus = (LocatedFileStatus) status;
  assertTrue(locatedFileStatus.getBlockLocations().length >= 1);

  for (BlockLocation blockLocation : locatedFileStatus.getBlockLocations()) {
    assertTrue(blockLocation.getNames().length >= 1);
    assertTrue(blockLocation.getHosts().length >= 1);
  }
}
 
Example 2
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private void verifyLocatedFileStatus(
    JobConf conf, List<LocatedFileStatus> stats)
    throws IOException {
  if (!conf.getBoolean("mapred.fileinputformat.verifysplits", true)) {
    return;
  }
  for (LocatedFileStatus stat: stats) {
    long fileLen = stat.getLen();
    long blockLenTotal = 0;
    for (BlockLocation loc: stat.getBlockLocations()) {
      blockLenTotal += loc.getLength();
    }
    if (blockLenTotal != fileLen) {
      throw new IOException("Error while getting located status, " +
        stat.getPath() + " has length " + fileLen + " but blocks total is " +
        blockLenTotal);
    }
  }
}
 
Example 3
Source File: DatanodeBenThread.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public int getNumberOfFiles() throws IOException {
  DistributedFileSystem dfs = (DistributedFileSystem)fs;
  RemoteIterator<LocatedFileStatus> iter = dfs.listLocatedStatus(outputPath);
  int fn = 0;
  while (iter.hasNext()) {
    LocatedFileStatus lfs = iter.next();
    if (lfs.isDir()) 
      continue;
    if (lfs.getBlockLocations().length != 1) 
      continue;
    String curHost = rtc.cur_datanode;
    for (String host: lfs.getBlockLocations()[0].getHosts()) {
      if (curHost.equals(host)){
        fn++;
        break;
      }
    }
  }
  LOG.info(" Found " + fn + " files in " + dfs.getUri());
  return fn;
}
 
Example 4
Source File: TestOzoneFileInterfaces.java    From hadoop-ozone with Apache License 2.0 5 votes vote down vote up
@Test
@Ignore("HDDS-3506")
public void testOzoneManagerLocatedFileStatusBlockOffsetsWithMultiBlockFile()
    throws Exception {
  // naive assumption: MiniOzoneCluster will not have larger than ~1GB
  // block size when running this test.
  int blockSize = (int) fs.getConf().getStorageSize(
      OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE,
      OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE_DEFAULT,
      StorageUnit.BYTES
  );
  String data = RandomStringUtils.randomAlphanumeric(2 * blockSize + 837);
  String filePath = RandomStringUtils.randomAlphanumeric(5);
  Path path = createPath("/" + filePath);
  try (FSDataOutputStream stream = fs.create(path)) {
    stream.writeBytes(data);
  }
  FileStatus status = fs.getFileStatus(path);
  assertTrue(status instanceof LocatedFileStatus);
  LocatedFileStatus locatedFileStatus = (LocatedFileStatus) status;
  BlockLocation[] blockLocations = locatedFileStatus.getBlockLocations();

  assertEquals(0, blockLocations[0].getOffset());
  assertEquals(blockSize, blockLocations[1].getOffset());
  assertEquals(2*blockSize, blockLocations[2].getOffset());
  assertEquals(blockSize, blockLocations[0].getLength());
  assertEquals(blockSize, blockLocations[1].getLength());
  assertEquals(837, blockLocations[2].getLength());
}
 
Example 5
Source File: FileSegmentPool.java    From indexr with Apache License 2.0 5 votes vote down vote up
public void refreshLocalities() {
    try {
        // HashMap taks muti-thread risk here. Change to ConcurrentHashMap if it happens.
        Map<String, List<String>> newHostMap = new HashMap<>(segmentFdMap.size());

        RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(segmentRootPath, true);
        while (files.hasNext()) {
            LocatedFileStatus fileStatus = files.next();
            if (fileStatus.getLen() == 0) {
                continue;
            }
            String name = getSegmentName(fileStatus);
            if (name == null) {
                continue;
            }
            BlockLocation[] locations = fileStatus.getBlockLocations();
            if (locations.length != 1) {
                logger.error("A segment should only consisted by one block, now {}. Ignored: {}", locations.length, name);
                continue;
            }
            List<String> hosts = Arrays.asList(locations[0].getHosts());
            newHostMap.put(name, hosts);
        }

        hostMap = newHostMap;
    } catch (IOException e) {
        if (e instanceof ClosedByInterruptException) {
            logger.warn("Refresh [{}] segment locality failed by ClosedByInterruptException.", tableName);
            // Normally close interrupt.
            return;
        }
        String msg = e.getMessage();
        if (msg != null && Strings.equals(msg.trim(), "Filesystem closed")) {
            logger.warn("Refresh [{}] segment locality failed by Filesystem closed.", tableName);
            // Normally close interrupt.
            return;
        }
        logger.warn("Refresh [{}] segment locality failed.", tableName, e);
    }
}
 
Example 6
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/** 
 * Generate the list of files and make them into FileSplits.
 */ 
public List<InputSplit> getSplits(JobContext job
                                  ) throws IOException {
  long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
  long maxSize = getMaxSplitSize(job);

  // generate splits
  List<InputSplit> splits = new ArrayList<InputSplit>();
  for (LocatedFileStatus file: listLocatedStatus(job)) {
    Path path = file.getPath();
    long length = file.getLen();
    BlockLocation[] blkLocations = file.getBlockLocations();

    if ((length != 0) && isSplitable(job, path)) { 
      long blockSize = file.getBlockSize();
      long splitSize = computeSplitSize(blockSize, minSize, maxSize);

      long bytesRemaining = length;
      while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
        int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
        splits.add(new FileSplit(path, length-bytesRemaining, splitSize, 
                                 blkLocations[blkIndex].getHosts()));
        bytesRemaining -= splitSize;
      }
      
      if (bytesRemaining != 0) {
        splits.add(new FileSplit(path, length-bytesRemaining, bytesRemaining, 
                   blkLocations[blkLocations.length-1].getHosts()));
      }
    } else if (length != 0) {
      splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
    } else { 
      //Create empty hosts array for zero length files
      splits.add(new FileSplit(path, 0, length, new String[0]));
    }
  }
  LOG.debug("Total # of splits: " + splits.size());
  return splits;
}
 
Example 7
Source File: PlacementMonitor.java    From RDFS with Apache License 2.0 5 votes vote down vote up
List<BlockInfo> getDirBlockInfos(FileSystem fs, Path dirPath)
    throws IOException {
  List<LocatedFileStatus> lfs = RaidNode.listDirectoryRaidLocatedFileStatus(conf,
      fs, dirPath);
  List<BlockInfo> result = new ArrayList<BlockInfo>();
  for (LocatedFileStatus stat: lfs) {
    for (BlockLocation loc : stat.getBlockLocations()) {
      result.add(new BlockInfo(loc, stat.getPath()));
    }
  }
  return result;
}
 
Example 8
Source File: PlacementMonitor.java    From RDFS with Apache License 2.0 5 votes vote down vote up
List<BlockInfo> getBlockInfos(
  FileSystem fs, Path path, long start, long length)
    throws IOException {
  LocatedFileStatus stat = getLocatedFileStatus(fs, path);
  List<BlockInfo> result = new ArrayList<BlockInfo>();
  long end = start + length;
  if (stat != null) {
    for (BlockLocation loc : stat.getBlockLocations()) {
      if (loc.getOffset() >= start && loc.getOffset() < end) {
        result.add(new BlockInfo(loc, path));
      }
    }
  }
  return result;
}