Java Code Examples for org.apache.hadoop.fs.BlockLocation#getLength()

The following examples show how to use org.apache.hadoop.fs.BlockLocation#getLength() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Metadata.java    From Bats with Apache License 2.0 6 votes vote down vote up
/**
 * Get the host affinity for a row group.
 *
 * @param fileStatus the parquet file
 * @param start      the start of the row group
 * @param length     the length of the row group
 * @return host affinity for the row group
 */
private Map<String, Float> getHostAffinity(FileStatus fileStatus, FileSystem fs, long start, long length)
    throws IOException {
  BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, start, length);
  Map<String, Float> hostAffinityMap = Maps.newHashMap();
  for (BlockLocation blockLocation : blockLocations) {
    for (String host : blockLocation.getHosts()) {
      Float currentAffinity = hostAffinityMap.get(host);
      float blockStart = blockLocation.getOffset();
      float blockEnd = blockStart + blockLocation.getLength();
      float rowGroupEnd = start + length;
      Float newAffinity = (blockLocation.getLength() - (blockStart < start ? start - blockStart : 0) -
          (blockEnd > rowGroupEnd ? blockEnd - rowGroupEnd : 0)) / length;
      if (currentAffinity != null) {
        hostAffinityMap.put(host, currentAffinity + newAffinity);
      } else {
        hostAffinityMap.put(host, newAffinity);
      }
    }
  }
  return hostAffinityMap;
}
 
Example 2
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private void verifyLocatedFileStatus(
    JobConf conf, List<LocatedFileStatus> stats)
    throws IOException {
  if (!conf.getBoolean("mapred.fileinputformat.verifysplits", true)) {
    return;
  }
  for (LocatedFileStatus stat: stats) {
    long fileLen = stat.getLen();
    long blockLenTotal = 0;
    for (BlockLocation loc: stat.getBlockLocations()) {
      blockLenTotal += loc.getLength();
    }
    if (blockLenTotal != fileLen) {
      throw new IOException("Error while getting located status, " +
        stat.getPath() + " has length " + fileLen + " but blocks total is " +
        blockLenTotal);
    }
  }
}
 
Example 3
Source File: Data.java    From Hi-WAY with Apache License 2.0 6 votes vote down vote up
public long countAvailableLocalData(Container container) throws IOException {
	BlockLocation[] blockLocations = null;

	Path hdfsLocation = getHdfsPath();
	while (blockLocations == null) {
		FileStatus fileStatus = hdfs.getFileStatus(hdfsLocation);
		blockLocations = hdfs.getFileBlockLocations(hdfsLocation, 0, fileStatus.getLen());
	}

	long sum = 0;
	for (BlockLocation blockLocation : blockLocations) {
		for (String host : blockLocation.getHosts()) {
			if (container.getNodeId().getHost().equals(host)) {
				sum += blockLocation.getLength();
				break;
			}
		}
	}
	return sum;
}
 
Example 4
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
protected int getBlockIndex(BlockLocation[] blkLocations, 
                            long offset) {
  for (int i = 0 ; i < blkLocations.length; i++) {
    // is the offset inside this block?
    if ((blkLocations[i].getOffset() <= offset) &&
        (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
      return i;
    }
  }
  BlockLocation last = blkLocations[blkLocations.length -1];
  long fileLength = last.getOffset() + last.getLength() -1;
  throw new IllegalArgumentException("Offset " + offset + 
                                     " is outside of file (0.." +
                                     fileLength + ")");
}
 
Example 5
Source File: RaidShell.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private boolean isBlockCorrupt(BlockLocation fileBlock)
    throws IOException {
  if (fileBlock == null)
    // empty block
    return false;
  return fileBlock.isCorrupt() || 
      (fileBlock.getNames().length == 0 && fileBlock.getLength() > 0);
}
 
Example 6
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
protected int getBlockIndex(BlockLocation[] blkLocations, 
                            long offset) {
  for (int i = 0 ; i < blkLocations.length; i++) {
    // is the offset inside this block?
    if ((blkLocations[i].getOffset() <= offset) &&
        (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
      return i;
    }
  }
  BlockLocation last = blkLocations[blkLocations.length -1];
  long fileLength = last.getOffset() + last.getLength() -1;
  throw new IllegalArgumentException("Offset " + offset + 
                                     " is outside of file (0.." +
                                     fileLength + ")");
}
 
Example 7
Source File: FSUtils.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Update blocksDistribution with blockLocations
 * @param blocksDistribution the hdfs blocks distribution
 * @param blockLocations an array containing block location
 */
static public void addToHDFSBlocksDistribution(
    HDFSBlocksDistribution blocksDistribution, BlockLocation[] blockLocations)
    throws IOException {
  for (BlockLocation bl : blockLocations) {
    String[] hosts = bl.getHosts();
    long len = bl.getLength();
    StorageType[] storageTypes = bl.getStorageTypes();
    blocksDistribution.addHostsAndBlockWeight(hosts, len, storageTypes);
  }
}
 
Example 8
Source File: DataLocalizer.java    From mr4c with Apache License 2.0 5 votes vote down vote up
public List<String> localize(Collection<DataFileSource> sources) throws IOException {
	List<BlockLocation> allBlocks = new ArrayList<BlockLocation>();
	long totalSize=0;
	for ( DataFileSource src : sources ) {	
		BlockLocation[] blocks = src.getBlockLocation();
		allBlocks.addAll(Arrays.asList(blocks));
		for ( BlockLocation block : blocks ) {
			totalSize+=block.getLength();
		}
	}

	return Arrays.asList(m_calc.calcSplitHosts(allBlocks.toArray(new BlockLocation[allBlocks.size()]), 0, totalSize, m_topo));

}
 
Example 9
Source File: HadoopIgfsSecondaryFileSystemDelegateImpl.java    From ignite with Apache License 2.0 5 votes vote down vote up
/**
 * Convert IGFS affinity block location into Hadoop affinity block location.
 *
 * @param block IGFS affinity block location.
 * @return Hadoop affinity block location.
 */
private IgfsBlockLocation convertBlockLocation(BlockLocation block) {
    try {
        String[] names = block.getNames();
        String[] hosts = block.getHosts();

        return new IgfsBlockLocationImpl(
            block.getOffset(), block.getLength(),
            Arrays.asList(names), Arrays.asList(hosts));
    } catch (IOException e) {
        throw handleSecondaryFsError(e, "Failed convert block location: " + block);
    }
}
 
Example 10
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
protected int getBlockIndex(BlockLocation[] blkLocations, 
                            long offset) {
  for (int i = 0 ; i < blkLocations.length; i++) {
    // is the offset inside this block?
    if ((blkLocations[i].getOffset() <= offset) &&
        (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
      return i;
    }
  }
  BlockLocation last = blkLocations[blkLocations.length -1];
  long fileLength = last.getOffset() + last.getLength() -1;
  throw new IllegalArgumentException("Offset " + offset + 
                                     " is outside of file (0.." +
                                     fileLength + ")");
}
 
Example 11
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
protected int getBlockIndex(BlockLocation[] blkLocations, 
                            long offset) {
  for (int i = 0 ; i < blkLocations.length; i++) {
    // is the offset inside this block?
    if ((blkLocations[i].getOffset() <= offset) &&
        (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
      return i;
    }
  }
  BlockLocation last = blkLocations[blkLocations.length -1];
  long fileLength = last.getOffset() + last.getLength() -1;
  throw new IllegalArgumentException("Offset " + offset + 
                                     " is outside of file (0.." +
                                     fileLength + ")");
}
 
Example 12
Source File: TestAffinityCalculator.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void testBuildRangeMap() {
  BlockLocation[] blocks = buildBlockLocations(new String[4], 256*1024*1024);
  long tA = System.nanoTime();
  ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<>();
  for (BlockLocation block : blocks) {
    long start = block.getOffset();
    long end = start + block.getLength();
    Range<Long> range = Range.closedOpen(start, end);
    blockMapBuilder = blockMapBuilder.put(range, block);
  }
  ImmutableRangeMap<Long,BlockLocation> map = blockMapBuilder.build();
  long tB = System.nanoTime();
  System.out.println(String.format("Took %f ms to build range map", (tB - tA) / 1e6));
}
 
Example 13
Source File: FileInputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
protected int getBlockIndex(BlockLocation[] blkLocations, 
                            long offset) {
  for (int i = 0 ; i < blkLocations.length; i++) {
    // is the offset inside this block?
    if ((blkLocations[i].getOffset() <= offset) &&
        (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
      return i;
    }
  }
  BlockLocation last = blkLocations[blkLocations.length -1];
  long fileLength = last.getOffset() + last.getLength() -1;
  throw new IllegalArgumentException("Offset " + offset + 
                                     " is outside of file (0.." +
                                     fileLength + ")");
}
 
Example 14
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
protected int getBlockIndex(BlockLocation[] blkLocations, 
                            long offset) {
  for (int i = 0 ; i < blkLocations.length; i++) {
    // is the offset inside this block?
    if ((blkLocations[i].getOffset() <= offset) &&
        (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
      return i;
    }
  }
  BlockLocation last = blkLocations[blkLocations.length -1];
  long fileLength = last.getOffset() + last.getLength() -1;
  throw new IllegalArgumentException("Offset " + offset + 
                                     " is outside of file (0.." +
                                     fileLength + ")");
}
 
Example 15
Source File: FileInputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
protected int getBlockIndex(BlockLocation[] blkLocations, 
                            long offset) {
  for (int i = 0 ; i < blkLocations.length; i++) {
    // is the offset inside this block?
    if ((blkLocations[i].getOffset() <= offset) &&
        (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())){
      return i;
    }
  }
  BlockLocation last = blkLocations[blkLocations.length -1];
  long fileLength = last.getOffset() + last.getLength() -1;
  throw new IllegalArgumentException("Offset " + offset + 
                                     " is outside of file (0.." +
                                     fileLength + ")");
}
 
Example 16
Source File: HdfsLocalityReporter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Provide statistics on HDFS block locality, both in terms of bytes and block counts.
 */
@Override
public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
  solrMetricsContext = parentContext.getChildContext(this);
  MetricsMap metricsMap = new MetricsMap((detailed, map) -> {
    long totalBytes = 0;
    long localBytes = 0;
    int totalCount = 0;
    int localCount = 0;

    for (Iterator<HdfsDirectory> iterator = cache.keySet().iterator(); iterator.hasNext();) {
      HdfsDirectory hdfsDirectory = iterator.next();

      if (hdfsDirectory.isClosed()) {
        iterator.remove();
      } else {
        try {
          refreshDirectory(hdfsDirectory);
          Map<FileStatus,BlockLocation[]> blockMap = cache.get(hdfsDirectory);

          // For every block in every file in this directory, count it
          for (BlockLocation[] locations : blockMap.values()) {
            for (BlockLocation bl : locations) {
              totalBytes += bl.getLength();
              totalCount++;

              if (Arrays.asList(bl.getHosts()).contains(hostname)) {
                localBytes += bl.getLength();
                localCount++;
              }
            }
          }
        } catch (IOException e) {
          log.warn("Could not retrieve locality information for {} due to exception: {}",
              hdfsDirectory.getHdfsDirPath(), e);
        }
      }
    }
    map.put(LOCALITY_BYTES_TOTAL, totalBytes);
    map.put(LOCALITY_BYTES_LOCAL, localBytes);
    if (localBytes == 0) {
      map.put(LOCALITY_BYTES_RATIO, 0);
    } else {
      map.put(LOCALITY_BYTES_RATIO, localBytes / (double) totalBytes);
    }
    map.put(LOCALITY_BLOCKS_TOTAL, totalCount);
    map.put(LOCALITY_BLOCKS_LOCAL, localCount);
    if (localCount == 0) {
      map.put(LOCALITY_BLOCKS_RATIO, 0);
    } else {
      map.put(LOCALITY_BLOCKS_RATIO, localCount / (double) totalCount);
    }
  });
  solrMetricsContext.gauge(metricsMap, true, "hdfsLocality", getCategory().toString(), scope);
}
 
Example 17
Source File: InputStriper.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * @param inputDir Pool used to resolve block locations.
 * @param bytes Target byte count
 * @param nLocs Number of block locations per split.
 * @return A set of files satisfying the byte count, with locations weighted
 *         to the dominating proportion of input bytes.
 */
CombineFileSplit splitFor(FilePool inputDir, long bytes, int nLocs)
    throws IOException {
  final ArrayList<Path> paths = new ArrayList<Path>();
  final ArrayList<Long> start = new ArrayList<Long>();
  final ArrayList<Long> length = new ArrayList<Long>();
  final HashMap<String,Double> sb = new HashMap<String,Double>();
  do {
    paths.add(current.getPath());
    start.add(currentStart);
    final long fromFile = Math.min(bytes, current.getLen() - currentStart);
    length.add(fromFile);
    for (BlockLocation loc :
        inputDir.locationsFor(current, currentStart, fromFile)) {
      final double tedium = loc.getLength() / (1.0 * bytes);
      for (String l : loc.getHosts()) {
        Double j = sb.get(l);
        if (null == j) {
          sb.put(l, tedium);
        } else {
          sb.put(l, j.doubleValue() + tedium);
        }
      }
    }
    currentStart += fromFile;
    bytes -= fromFile;
    // Switch to a new file if
    //  - the current file is uncompressed and completely used
    //  - the current file is compressed
    
    CompressionCodecFactory compressionCodecs = 
      new CompressionCodecFactory(conf);
    CompressionCodec codec = compressionCodecs.getCodec(current.getPath());
    if (current.getLen() - currentStart == 0
        || codec != null) {
      current = files.get(++idx % files.size());
      currentStart = 0;
    }
  } while (bytes > 0);
  final ArrayList<Entry<String,Double>> sort =
    new ArrayList<Entry<String,Double>>(sb.entrySet());
  Collections.sort(sort, hostRank);
  final String[] hosts = new String[Math.min(nLocs, sort.size())];
  for (int i = 0; i < nLocs && i < sort.size(); ++i) {
    hosts[i] = sort.get(i).getKey();
  }
  return new CombineFileSplit(paths.toArray(new Path[0]),
      toLongArray(start), toLongArray(length), hosts);
}
 
Example 18
Source File: InputStriper.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * @param inputDir Pool used to resolve block locations.
 * @param bytes Target byte count
 * @param nLocs Number of block locations per split.
 * @return A set of files satisfying the byte count, with locations weighted
 *         to the dominating proportion of input bytes.
 */
CombineFileSplit splitFor(FilePool inputDir, long bytes, int nLocs)
    throws IOException {
  final ArrayList<Path> paths = new ArrayList<Path>();
  final ArrayList<Long> start = new ArrayList<Long>();
  final ArrayList<Long> length = new ArrayList<Long>();
  final HashMap<String,Double> sb = new HashMap<String,Double>();
  do {
    paths.add(current.getPath());
    start.add(currentStart);
    final long fromFile = Math.min(bytes, current.getLen() - currentStart);
    length.add(fromFile);
    for (BlockLocation loc :
        inputDir.locationsFor(current, currentStart, fromFile)) {
      final double tedium = loc.getLength() / (1.0 * bytes);
      for (String l : loc.getHosts()) {
        Double j = sb.get(l);
        if (null == j) {
          sb.put(l, tedium);
        } else {
          sb.put(l, j.doubleValue() + tedium);
        }
      }
    }
    currentStart += fromFile;
    bytes -= fromFile;
    if (current.getLen() - currentStart == 0) {
      current = files.get(++idx % files.size());
      currentStart = 0;
    }
  } while (bytes > 0);
  final ArrayList<Entry<String,Double>> sort =
    new ArrayList<Entry<String,Double>>(sb.entrySet());
  Collections.sort(sort, hostRank);
  final String[] hosts = new String[Math.min(nLocs, sort.size())];
  for (int i = 0; i < nLocs && i < sort.size(); ++i) {
    hosts[i] = sort.get(i).getKey();
  }
  return new CombineFileSplit(paths.toArray(new Path[0]),
      toLongArray(start), toLongArray(length), hosts);
}
 
Example 19
Source File: FileFragment.java    From tajo with Apache License 2.0 4 votes vote down vote up
public FileFragment(String tableName, Path uri, BlockLocation blockLocation)
    throws IOException {
  this(tableName, uri, blockLocation.getOffset(), blockLocation.getLength(), blockLocation.getHosts(), null);
}
 
Example 20
Source File: ParquetInputFormat.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private long getHDFSBlockEndingPosition(int hdfsBlockIndex) {
  BlockLocation hdfsBlock = hdfsBlocks[hdfsBlockIndex];
  return hdfsBlock.getOffset() + hdfsBlock.getLength() - 1;
}