org.apache.hadoop.fs.BlockStorageLocation Java Examples

The following examples show how to use org.apache.hadoop.fs.BlockStorageLocation. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BlockStorageLocationUtil.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method to combine a list of {@link LocatedBlock} with associated
 * {@link VolumeId} information to form a list of {@link BlockStorageLocation}
 * .
 */
static BlockStorageLocation[] convertToVolumeBlockLocations(
    List<LocatedBlock> blocks, 
    Map<LocatedBlock, List<VolumeId>> blockVolumeIds) throws IOException {
  // Construct the final return value of VolumeBlockLocation[]
  BlockLocation[] locations = DFSUtil.locatedBlocks2Locations(blocks);
  List<BlockStorageLocation> volumeBlockLocs = 
      new ArrayList<BlockStorageLocation>(locations.length);
  for (int i = 0; i < locations.length; i++) {
    LocatedBlock locBlock = blocks.get(i);
    List<VolumeId> volumeIds = blockVolumeIds.get(locBlock);
    BlockStorageLocation bsLoc = new BlockStorageLocation(locations[i], 
        volumeIds.toArray(new VolumeId[0]));
    volumeBlockLocs.add(bsLoc);
  }
  return volumeBlockLocs.toArray(new BlockStorageLocation[] {});
}
 
Example #2
Source File: BlockStorageLocationUtil.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method to combine a list of {@link LocatedBlock} with associated
 * {@link VolumeId} information to form a list of {@link BlockStorageLocation}
 * .
 */
static BlockStorageLocation[] convertToVolumeBlockLocations(
    List<LocatedBlock> blocks, 
    Map<LocatedBlock, List<VolumeId>> blockVolumeIds) throws IOException {
  // Construct the final return value of VolumeBlockLocation[]
  BlockLocation[] locations = DFSUtil.locatedBlocks2Locations(blocks);
  List<BlockStorageLocation> volumeBlockLocs = 
      new ArrayList<BlockStorageLocation>(locations.length);
  for (int i = 0; i < locations.length; i++) {
    LocatedBlock locBlock = blocks.get(i);
    List<VolumeId> volumeIds = blockVolumeIds.get(locBlock);
    BlockStorageLocation bsLoc = new BlockStorageLocation(locations[i], 
        volumeIds.toArray(new VolumeId[0]));
    volumeBlockLocs.add(bsLoc);
  }
  return volumeBlockLocs.toArray(new BlockStorageLocation[] {});
}
 
Example #3
Source File: Main.java    From hdfs-metadata with GNU General Public License v3.0 5 votes vote down vote up
private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException {

		System.out.println("	Offset: " + blockLocation.getOffset());
		System.out.println("	Length: " + blockLocation.getLength());

		String[] cachedHosts = blockLocation.getCachedHosts();
		if (cachedHosts.length == 0) {
			System.out.println("	No cached hosts");
		}

		System.out.println("	Replicas:");
		VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation ?
				(((BlockStorageLocation) blockLocation).getVolumeIds()) : null;
		String[] hosts = blockLocation.getHosts();
		String[] names = blockLocation.getNames();
		String[] topologyPaths = blockLocation.getTopologyPaths();
		for (int i = 0; i < topologyPaths.length; i++) {
			int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1;
			
			System.out.println("		Replica (" + i + "):");
			System.out.println("			Host: " + hosts[i]);
			
			if(diskId == -1)
				System.out.println("			DiskId: unknown");
			else if(dataDirs != null && diskId < dataDirs.length)
				System.out.println("			Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")");
			else
				System.out.println("			DiskId: " + diskId);
			
			System.out.println("			Name: " + names[i]);
			System.out.println("			TopologyPaths: " + topologyPaths[i]);
		}

		if (cachedHosts.length > 0) {
			System.out.println("	Cached hosts:");
			for (String cachedHost : cachedHosts) {
				System.out.println("		Host: " + cachedHost);
			}
		}
	}
 
Example #4
Source File: DistributedFileSystemMetadataTest.java    From hdfs-metadata with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void computeHostsDiskIdsCount() throws IOException{
	List<BlockLocation> blockStorageLocations = new LinkedList<>();
	blockStorageLocations.add(new BlockStorageLocation(
			new BlockLocation(null, new String[]{"host1", "host2"}, 0, 0), 
			new VolumeId[]{new TVolumeId("3"), new TVolumeId("4")}));
	blockStorageLocations.add(new BlockStorageLocation(
			new BlockLocation(null, new String[]{"host2", "host3"}, 0, 0), 
			new VolumeId[]{new TVolumeId("4"), new TVolumeId("5")}));
	blockStorageLocations.add(new BlockStorageLocation(
			new BlockLocation(null, new String[]{"host10", "host2"}, 0, 0), 
			new VolumeId[]{new TVolumeId("3"), new TVolumeId("4")}));
	blockStorageLocations.add(new BlockStorageLocation(
			new BlockLocation(null, new String[]{"host10", "host3"}, 0, 0), 
			new VolumeId[]{new TVolumeId("8"), new TVolumeId("5")}));
	blockStorageLocations.add(new BlockLocation(null, new String[]{"host10", "host3", "host3"}, 0, 0));
			
	HashMap<String, HashMap<Integer, Integer>> hosts_diskids = 
			DistributedFileSystemMetadata.computeHostsDiskIdsCount(blockStorageLocations);
	
	Assert.assertEquals(1, hosts_diskids.get("host1").get(3).intValue());
	Assert.assertEquals(3, hosts_diskids.get("host2").get(4).intValue());
	Assert.assertEquals(2, hosts_diskids.get("host3").get(5).intValue());
	Assert.assertEquals(2, hosts_diskids.get("host3").get(-1).intValue());
	Assert.assertEquals(1, hosts_diskids.get("host10").get(3).intValue());
	Assert.assertEquals(1, hosts_diskids.get("host10").get(8).intValue());
	Assert.assertEquals(1, hosts_diskids.get("host10").get(-1).intValue());
}
 
Example #5
Source File: DFSClient.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Get block location information about a list of {@link HdfsBlockLocation}.
 * Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to
 * get {@link BlockStorageLocation}s for blocks returned by
 * {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)}
 * .
 * 
 * This is done by making a round of RPCs to the associated datanodes, asking
 * the volume of each block replica. The returned array of
 * {@link BlockStorageLocation} expose this information as a
 * {@link VolumeId}.
 * 
 * @param blockLocations
 *          target blocks on which to query volume location information
 * @return volumeBlockLocations original block array augmented with additional
 *         volume location information for each replica.
 */
public BlockStorageLocation[] getBlockStorageLocations(
    List<BlockLocation> blockLocations) throws IOException,
    UnsupportedOperationException, InvalidBlockTokenException {
  if (!getConf().getHdfsBlocksMetadataEnabled) {
    throw new UnsupportedOperationException("Datanode-side support for " +
        "getVolumeBlockLocations() must also be enabled in the client " +
        "configuration.");
  }
  // Downcast blockLocations and fetch out required LocatedBlock(s)
  List<LocatedBlock> blocks = new ArrayList<LocatedBlock>();
  for (BlockLocation loc : blockLocations) {
    if (!(loc instanceof HdfsBlockLocation)) {
      throw new ClassCastException("DFSClient#getVolumeBlockLocations " +
          "expected to be passed HdfsBlockLocations");
    }
    HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc;
    blocks.add(hdfsLoc.getLocatedBlock());
  }
  
  // Re-group the LocatedBlocks to be grouped by datanodes, with the values
  // a list of the LocatedBlocks on the datanode.
  Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks = 
      new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>();
  for (LocatedBlock b : blocks) {
    for (DatanodeInfo info : b.getLocations()) {
      if (!datanodeBlocks.containsKey(info)) {
        datanodeBlocks.put(info, new ArrayList<LocatedBlock>());
      }
      List<LocatedBlock> l = datanodeBlocks.get(info);
      l.add(b);
    }
  }
      
  // Make RPCs to the datanodes to get volume locations for its replicas
  TraceScope scope =
    Trace.startSpan("getBlockStorageLocations", traceSampler);
  Map<DatanodeInfo, HdfsBlocksMetadata> metadatas;
  try {
    metadatas = BlockStorageLocationUtil.
        queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks,
            getConf().getFileBlockStorageLocationsNumThreads,
            getConf().getFileBlockStorageLocationsTimeoutMs,
            getConf().connectToDnViaHostname);
    if (LOG.isTraceEnabled()) {
      LOG.trace("metadata returned: "
          + Joiner.on("\n").withKeyValueSeparator("=").join(metadatas));
    }
  } finally {
    scope.close();
  }
  
  // Regroup the returned VolumeId metadata to again be grouped by
  // LocatedBlock rather than by datanode
  Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtil
      .associateVolumeIdsWithBlocks(blocks, metadatas);
  
  // Combine original BlockLocations with new VolumeId information
  BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtil
      .convertToVolumeBlockLocations(blocks, blockVolumeIds);

  return volumeBlockLocations;
}
 
Example #6
Source File: DistributedFileSystemMetadata.java    From hdfs-metadata with GNU General Public License v3.0 4 votes vote down vote up
public LinkedList<BlockLocation> getBlockLocations(Path path) throws IOException {
	LOG.info("Collecting block locations...");
	
	LinkedList<BlockLocation> blockLocations = new LinkedList<BlockLocation>();
	RemoteIterator<LocatedFileStatus> statuses = listFiles(path, true);
	int hasNextCode = hasNextCode(statuses);
	while(hasNextCode > 0){
		if(hasNextCode > 1){
			hasNextCode = hasNextCode(statuses);
			continue;
		}
		
		LocatedFileStatus fileStatus = statuses.next();
		
		if(fileStatus.isFile()){
			BlockLocation[] blockLocations_tmp = getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
			
			blockLocations.addAll(Arrays.asList(blockLocations_tmp));
		}
		
		int size = blockLocations.size();
		if(size > 0 && size % 5000 == 0)
			LOG.info("Collected " + size + " locations. Still in progress...");
		
		if(size >= MAX_NUMBER_OF_LOCATIONS){
			LOG.info("Reached max number of locations to collect. The amount will be representative enough.");
			break;
		}
		
		hasNextCode = hasNextCode(statuses);
	}
	LOG.info("Collected " + blockLocations.size() + " locations.");
	
	if(isHdfsBlocksMetadataEnabled()){
		BlockStorageLocation[] blockStorageLocations = getFileBlockStorageLocations(blockLocations);
		
		blockLocations.clear();
		blockLocations.addAll(Arrays.asList(blockStorageLocations));
	}else{
		LOG.error("VolumnId/DiskId can not be collected since "
				+ "dfs.datanode.hdfs-blocks-metadata.enabled is not enabled.");
	}
	
	return blockLocations;
}
 
Example #7
Source File: DFSClient.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Get block location information about a list of {@link HdfsBlockLocation}.
 * Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to
 * get {@link BlockStorageLocation}s for blocks returned by
 * {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)}
 * .
 * 
 * This is done by making a round of RPCs to the associated datanodes, asking
 * the volume of each block replica. The returned array of
 * {@link BlockStorageLocation} expose this information as a
 * {@link VolumeId}.
 * 
 * @param blockLocations
 *          target blocks on which to query volume location information
 * @return volumeBlockLocations original block array augmented with additional
 *         volume location information for each replica.
 */
public BlockStorageLocation[] getBlockStorageLocations(
    List<BlockLocation> blockLocations) throws IOException,
    UnsupportedOperationException, InvalidBlockTokenException {
  if (!getConf().getHdfsBlocksMetadataEnabled) {
    throw new UnsupportedOperationException("Datanode-side support for " +
        "getVolumeBlockLocations() must also be enabled in the client " +
        "configuration.");
  }
  // Downcast blockLocations and fetch out required LocatedBlock(s)
  List<LocatedBlock> blocks = new ArrayList<LocatedBlock>();
  for (BlockLocation loc : blockLocations) {
    if (!(loc instanceof HdfsBlockLocation)) {
      throw new ClassCastException("DFSClient#getVolumeBlockLocations " +
          "expected to be passed HdfsBlockLocations");
    }
    HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc;
    blocks.add(hdfsLoc.getLocatedBlock());
  }
  
  // Re-group the LocatedBlocks to be grouped by datanodes, with the values
  // a list of the LocatedBlocks on the datanode.
  Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks = 
      new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>();
  for (LocatedBlock b : blocks) {
    for (DatanodeInfo info : b.getLocations()) {
      if (!datanodeBlocks.containsKey(info)) {
        datanodeBlocks.put(info, new ArrayList<LocatedBlock>());
      }
      List<LocatedBlock> l = datanodeBlocks.get(info);
      l.add(b);
    }
  }
      
  // Make RPCs to the datanodes to get volume locations for its replicas
  TraceScope scope =
    Trace.startSpan("getBlockStorageLocations", traceSampler);
  Map<DatanodeInfo, HdfsBlocksMetadata> metadatas;
  try {
    metadatas = BlockStorageLocationUtil.
        queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks,
            getConf().getFileBlockStorageLocationsNumThreads,
            getConf().getFileBlockStorageLocationsTimeoutMs,
            getConf().connectToDnViaHostname);
    if (LOG.isTraceEnabled()) {
      LOG.trace("metadata returned: "
          + Joiner.on("\n").withKeyValueSeparator("=").join(metadatas));
    }
  } finally {
    scope.close();
  }
  
  // Regroup the returned VolumeId metadata to again be grouped by
  // LocatedBlock rather than by datanode
  Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtil
      .associateVolumeIdsWithBlocks(blocks, metadatas);
  
  // Combine original BlockLocations with new VolumeId information
  BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtil
      .convertToVolumeBlockLocations(blocks, blockVolumeIds);

  return volumeBlockLocations;
}
 
Example #8
Source File: DistributedFileSystem.java    From hadoop with Apache License 2.0 3 votes vote down vote up
/**
 * Used to query storage location information for a list of blocks. This list
 * of blocks is normally constructed via a series of calls to
 * {@link DistributedFileSystem#getFileBlockLocations(Path, long, long)} to
 * get the blocks for ranges of a file.
 * 
 * The returned array of {@link BlockStorageLocation} augments
 * {@link BlockLocation} with a {@link VolumeId} per block replica. The
 * VolumeId specifies the volume on the datanode on which the replica resides.
 * The VolumeId associated with a replica may be null because volume
 * information can be unavailable if the corresponding datanode is down or
 * if the requested block is not found.
 * 
 * This API is unstable, and datanode-side support is disabled by default. It
 * can be enabled by setting "dfs.datanode.hdfs-blocks-metadata.enabled" to
 * true.
 * 
 * @param blocks
 *          List of target BlockLocations to query volume location information
 * @return volumeBlockLocations Augmented array of
 *         {@link BlockStorageLocation}s containing additional volume location
 *         information for each replica of each block.
 */
@InterfaceStability.Unstable
public BlockStorageLocation[] getFileBlockStorageLocations(
    List<BlockLocation> blocks) throws IOException, 
    UnsupportedOperationException, InvalidBlockTokenException {
  return dfs.getBlockStorageLocations(blocks);
}
 
Example #9
Source File: DistributedFileSystem.java    From big-c with Apache License 2.0 3 votes vote down vote up
/**
 * Used to query storage location information for a list of blocks. This list
 * of blocks is normally constructed via a series of calls to
 * {@link DistributedFileSystem#getFileBlockLocations(Path, long, long)} to
 * get the blocks for ranges of a file.
 * 
 * The returned array of {@link BlockStorageLocation} augments
 * {@link BlockLocation} with a {@link VolumeId} per block replica. The
 * VolumeId specifies the volume on the datanode on which the replica resides.
 * The VolumeId associated with a replica may be null because volume
 * information can be unavailable if the corresponding datanode is down or
 * if the requested block is not found.
 * 
 * This API is unstable, and datanode-side support is disabled by default. It
 * can be enabled by setting "dfs.datanode.hdfs-blocks-metadata.enabled" to
 * true.
 * 
 * @param blocks
 *          List of target BlockLocations to query volume location information
 * @return volumeBlockLocations Augmented array of
 *         {@link BlockStorageLocation}s containing additional volume location
 *         information for each replica of each block.
 */
@InterfaceStability.Unstable
public BlockStorageLocation[] getFileBlockStorageLocations(
    List<BlockLocation> blocks) throws IOException, 
    UnsupportedOperationException, InvalidBlockTokenException {
  return dfs.getBlockStorageLocations(blocks);
}