Java Code Examples for org.apache.hadoop.fs.BlockLocation#getHosts()

The following examples show how to use org.apache.hadoop.fs.BlockLocation#getHosts() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Metadata.java    From Bats with Apache License 2.0 6 votes vote down vote up
/**
 * Get the host affinity for a row group.
 *
 * @param fileStatus the parquet file
 * @param start      the start of the row group
 * @param length     the length of the row group
 * @return host affinity for the row group
 */
private Map<String, Float> getHostAffinity(FileStatus fileStatus, FileSystem fs, long start, long length)
    throws IOException {
  BlockLocation[] blockLocations = fs.getFileBlockLocations(fileStatus, start, length);
  Map<String, Float> hostAffinityMap = Maps.newHashMap();
  for (BlockLocation blockLocation : blockLocations) {
    for (String host : blockLocation.getHosts()) {
      Float currentAffinity = hostAffinityMap.get(host);
      float blockStart = blockLocation.getOffset();
      float blockEnd = blockStart + blockLocation.getLength();
      float rowGroupEnd = start + length;
      Float newAffinity = (blockLocation.getLength() - (blockStart < start ? start - blockStart : 0) -
          (blockEnd > rowGroupEnd ? blockEnd - rowGroupEnd : 0)) / length;
      if (currentAffinity != null) {
        hostAffinityMap.put(host, currentAffinity + newAffinity);
      } else {
        hostAffinityMap.put(host, newAffinity);
      }
    }
  }
  return hostAffinityMap;
}
 
Example 2
Source File: BlurBlockPlacementPolicyDefaultTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private void waitForReplication(FileSystem fileSystem, Path p, int replicas) throws IOException, InterruptedException {
  FileStatus fileStatus = fileSystem.getFileStatus(p);
  boolean fail = true;
  while (fail) {
    fail = false;
    BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(p, 0, fileStatus.getLen());
    for (BlockLocation blockLocation : blockLocations) {
      System.out.println(blockLocation);
      String[] hosts = blockLocation.getHosts();
      if (hosts.length != replicas) {
        fail = true;
      }
    }
    Thread.sleep(1000);
  }
}
 
Example 3
Source File: Data.java    From Hi-WAY with Apache License 2.0 6 votes vote down vote up
public long countAvailableLocalData(Container container) throws IOException {
	BlockLocation[] blockLocations = null;

	Path hdfsLocation = getHdfsPath();
	while (blockLocations == null) {
		FileStatus fileStatus = hdfs.getFileStatus(hdfsLocation);
		blockLocations = hdfs.getFileBlockLocations(hdfsLocation, 0, fileStatus.getLen());
	}

	long sum = 0;
	for (BlockLocation blockLocation : blockLocations) {
		for (String host : blockLocation.getHosts()) {
			if (container.getNodeId().getHost().equals(host)) {
				sum += blockLocation.getLength();
				break;
			}
		}
	}
	return sum;
}
 
Example 4
Source File: HdfsClient.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * 取得文件块所在的位置..
 */
public void getFileBlockLocation(String pathuri) {
	try {
		Path filePath = new Path(pathuri);
		FileStatus fileStatus = fs.getFileStatus(filePath);
		if (fileStatus.isDirectory()) {
			System.out.println("**** getFileBlockLocations only for file");
			return;
		}
		System.out.println(">>>> file block location:");
		BlockLocation[] blkLocations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
		for (BlockLocation currentLocation : blkLocations) {
			String[] hosts = currentLocation.getHosts();
			for (String host : hosts) {
				System.out.println(">>>> host: " + host);
			}
		}

		//取得最后修改时间
		long modifyTime = fileStatus.getModificationTime();
		Date d = new Date(modifyTime);
		System.out.println(">>>> ModificationTime = " + d);
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
Example 5
Source File: TestSwiftFileSystemBlockLocation.java    From sahara-extra with Apache License 2.0 5 votes vote down vote up
private void assertLocationValid(BlockLocation location) throws
                                                         IOException {
  LOG.info(location);
  String[] hosts = location.getHosts();
  String[] names = location.getNames();
  assertNotEqual("No hosts supplied for " + location, 0, hosts.length);
  //for every host, there's a name.
  assertEquals("Unequal names and hosts in " + location,
               hosts.length, names.length);
  assertEquals(SwiftProtocolConstants.BLOCK_LOCATION,
               location.getNames()[0]);
  assertEquals(SwiftProtocolConstants.TOPOLOGY_PATH,
               location.getTopologyPaths()[0]);
}
 
Example 6
Source File: BlurBlockPlacementPolicyDefaultTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void assertBlocksExistOnShardServer(FileSystem fileSystem, Path p, String shardServer) throws IOException {
  FileStatus fileStatus = fileSystem.getFileStatus(p);
  BlockLocation[] blockLocations = fileSystem.getFileBlockLocations(p, 0, fileStatus.getLen());
  for (BlockLocation blockLocation : blockLocations) {
    System.out.println(blockLocation);
    String[] hosts = blockLocation.getHosts();
    assertTrue(Arrays.asList(hosts).contains(shardServer));
  }
}
 
Example 7
Source File: FSUtils.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Update blocksDistribution with blockLocations
 * @param blocksDistribution the hdfs blocks distribution
 * @param blockLocations an array containing block location
 */
static public void addToHDFSBlocksDistribution(
    HDFSBlocksDistribution blocksDistribution, BlockLocation[] blockLocations)
    throws IOException {
  for (BlockLocation bl : blockLocations) {
    String[] hosts = bl.getHosts();
    long len = bl.getLength();
    StorageType[] storageTypes = bl.getStorageTypes();
    blocksDistribution.addHostsAndBlockWeight(hosts, len, storageTypes);
  }
}
 
Example 8
Source File: HadoopIgfsSecondaryFileSystemDelegateImpl.java    From ignite with Apache License 2.0 5 votes vote down vote up
/**
 * Convert IGFS affinity block location into Hadoop affinity block location.
 *
 * @param block IGFS affinity block location.
 * @return Hadoop affinity block location.
 */
private IgfsBlockLocation convertBlockLocation(BlockLocation block) {
    try {
        String[] names = block.getNames();
        String[] hosts = block.getHosts();

        return new IgfsBlockLocationImpl(
            block.getOffset(), block.getLength(),
            Arrays.asList(names), Arrays.asList(hosts));
    } catch (IOException e) {
        throw handleSecondaryFsError(e, "Failed convert block location: " + block);
    }
}
 
Example 9
Source File: TestSwiftFileSystemBlockLocation.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void assertLocationValid(BlockLocation location) throws
                                                         IOException {
  LOG.info(location);
  String[] hosts = location.getHosts();
  String[] names = location.getNames();
  assertNotEqual("No hosts supplied for " + location, 0, hosts.length);
  //for every host, there's a name.
  assertEquals("Unequal names and hosts in " + location,
               hosts.length, names.length);
  assertEquals(SwiftProtocolConstants.BLOCK_LOCATION,
               location.getNames()[0]);
  assertEquals(SwiftProtocolConstants.TOPOLOGY_PATH,
               location.getTopologyPaths()[0]);
}
 
Example 10
Source File: Main.java    From hdfs-metadata with GNU General Public License v3.0 5 votes vote down vote up
private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException {

		System.out.println("	Offset: " + blockLocation.getOffset());
		System.out.println("	Length: " + blockLocation.getLength());

		String[] cachedHosts = blockLocation.getCachedHosts();
		if (cachedHosts.length == 0) {
			System.out.println("	No cached hosts");
		}

		System.out.println("	Replicas:");
		VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation ?
				(((BlockStorageLocation) blockLocation).getVolumeIds()) : null;
		String[] hosts = blockLocation.getHosts();
		String[] names = blockLocation.getNames();
		String[] topologyPaths = blockLocation.getTopologyPaths();
		for (int i = 0; i < topologyPaths.length; i++) {
			int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1;
			
			System.out.println("		Replica (" + i + "):");
			System.out.println("			Host: " + hosts[i]);
			
			if(diskId == -1)
				System.out.println("			DiskId: unknown");
			else if(dataDirs != null && diskId < dataDirs.length)
				System.out.println("			Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")");
			else
				System.out.println("			DiskId: " + diskId);
			
			System.out.println("			Name: " + names[i]);
			System.out.println("			TopologyPaths: " + topologyPaths[i]);
		}

		if (cachedHosts.length > 0) {
			System.out.println("	Cached hosts:");
			for (String cachedHost : cachedHosts) {
				System.out.println("		Host: " + cachedHost);
			}
		}
	}
 
Example 11
Source File: TestSwiftFileSystemBlockLocation.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void assertLocationValid(BlockLocation location) throws
                                                         IOException {
  LOG.info(location);
  String[] hosts = location.getHosts();
  String[] names = location.getNames();
  assertNotEqual("No hosts supplied for " + location, 0, hosts.length);
  //for every host, there's a name.
  assertEquals("Unequal names and hosts in " + location,
               hosts.length, names.length);
  assertEquals(SwiftProtocolConstants.BLOCK_LOCATION,
               location.getNames()[0]);
  assertEquals(SwiftProtocolConstants.TOPOLOGY_PATH,
               location.getTopologyPaths()[0]);
}
 
Example 12
Source File: InternalHiveSplitFactory.java    From presto with Apache License 2.0 5 votes vote down vote up
private static String[] getBlockHosts(BlockLocation blockLocation)
{
    try {
        return blockLocation.getHosts();
    }
    catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
 
Example 13
Source File: InputStriper.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * @param inputDir Pool used to resolve block locations.
 * @param bytes Target byte count
 * @param nLocs Number of block locations per split.
 * @return A set of files satisfying the byte count, with locations weighted
 *         to the dominating proportion of input bytes.
 */
CombineFileSplit splitFor(FilePool inputDir, long bytes, int nLocs)
    throws IOException {
  final ArrayList<Path> paths = new ArrayList<Path>();
  final ArrayList<Long> start = new ArrayList<Long>();
  final ArrayList<Long> length = new ArrayList<Long>();
  final HashMap<String,Double> sb = new HashMap<String,Double>();
  do {
    paths.add(current.getPath());
    start.add(currentStart);
    final long fromFile = Math.min(bytes, current.getLen() - currentStart);
    length.add(fromFile);
    for (BlockLocation loc :
        inputDir.locationsFor(current, currentStart, fromFile)) {
      final double tedium = loc.getLength() / (1.0 * bytes);
      for (String l : loc.getHosts()) {
        Double j = sb.get(l);
        if (null == j) {
          sb.put(l, tedium);
        } else {
          sb.put(l, j.doubleValue() + tedium);
        }
      }
    }
    currentStart += fromFile;
    bytes -= fromFile;
    // Switch to a new file if
    //  - the current file is uncompressed and completely used
    //  - the current file is compressed
    
    CompressionCodecFactory compressionCodecs = 
      new CompressionCodecFactory(conf);
    CompressionCodec codec = compressionCodecs.getCodec(current.getPath());
    if (current.getLen() - currentStart == 0
        || codec != null) {
      current = files.get(++idx % files.size());
      currentStart = 0;
    }
  } while (bytes > 0);
  final ArrayList<Entry<String,Double>> sort =
    new ArrayList<Entry<String,Double>>(sb.entrySet());
  Collections.sort(sort, hostRank);
  final String[] hosts = new String[Math.min(nLocs, sort.size())];
  for (int i = 0; i < nLocs && i < sort.size(); ++i) {
    hosts[i] = sort.get(i).getKey();
  }
  return new CombineFileSplit(paths.toArray(new Path[0]),
      toLongArray(start), toLongArray(length), hosts);
}
 
Example 14
Source File: RubixInputSplit.java    From Cubert with Apache License 2.0 4 votes vote down vote up
@Override
public String[] getLocations() throws IOException,
        InterruptedException
{
    if (hostnames == null)
    {
        /* Obtain the FileSystem object and get the FileStatus objects for the split */
        FileSystem fileSystem = FileSystem.get(conf);
        FileStatus fileStatus = fileSystem.getFileStatus(filename);
        /*
         * Obtain the Block locations for the split. This also provides the offset and
         * length information for each block
         */
        final BlockLocation[] blockLocations =
                fileSystem.getFileBlockLocations(fileStatus, offset, length);
        /**
         * Collect all hosts in a map and populate the number of bytes to be read from
         * each host
         */
        Long l;
        Map<String, Long> hostMap = new HashMap<String, Long>();
        for (BlockLocation bl : blockLocations)
        {
            final long start = bl.getOffset() < offset ? offset : bl.getOffset();
            final long end =
                    (offset + length) < (bl.getOffset() + bl.getLength()) ? offset
                            + length : bl.getOffset() + bl.getLength();
            final long nRelevantBytes = end - start;
            for (String host : bl.getHosts())
            {
                hostMap.put(host, ((l = hostMap.get(host)) == null ? 0 : l)
                        + nRelevantBytes);
            }
        }
        /* Sort them in decreasing order of maximum number of relevant bytes */
        final Set<Map.Entry<String, Long>> entries = hostMap.entrySet();
        final Map.Entry<String, Long>[] hostLengthPairs =
                entries.toArray(new Map.Entry[entries.size()]);

        Arrays.sort(hostLengthPairs, new Comparator<Map.Entry<String, Long>>()
        {
            @Override
            public int compare(Map.Entry<String, Long> e1, Map.Entry<String, Long> e2)
            {
                return (int) (e2.getValue() - e1.getValue());
            }
        });

        /* Populate the hostnames object */
        final int nHost = Math.min(hostLengthPairs.length, MAX_LOCATIONS);
        hostnames = new String[nHost];
        for (int i = 0; i < nHost; ++i)
        {
            hostnames[i] = hostLengthPairs[i].getKey();
        }
    }
    return hostnames;
}
 
Example 15
Source File: FileFragment.java    From tajo with Apache License 2.0 4 votes vote down vote up
public FileFragment(String tableName, Path uri, BlockLocation blockLocation)
    throws IOException {
  this(tableName, uri, blockLocation.getOffset(), blockLocation.getLength(), blockLocation.getHosts(), null);
}
 
Example 16
Source File: DremioORCRecordUtils.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private void computeLocality(FileSystem fs, Path path, DiskRangeList range) {
  if (this.remoteRead) {
    return;
  }

  boolean currentReadIsRemote = false;
  try {
    String localHost = InetAddress.getLocalHost().getCanonicalHostName();
    while (range != null) {
      int len = (int) (range.getEnd() - range.getOffset());
      long off = range.getOffset();
      BlockLocation[] blockLocations = fs.getFileBlockLocations(path, off, len);
      List<Range<Long>> intersectingRanges = new ArrayList<>();
      Range<Long> rowGroupRange = Range.openClosed(off, off+len);
      for (BlockLocation loc : blockLocations) {
        for (String host : loc.getHosts()) {
          if (host.equals(localHost)) {
            intersectingRanges.add(Range.closedOpen(loc.getOffset(), loc.getOffset() + loc.getLength()).intersection(rowGroupRange));
          }
        }
      }
      long totalIntersect = 0;
      for (Range<Long> intersectingRange : intersectingRanges) {
        totalIntersect += (intersectingRange.upperEndpoint() - intersectingRange.lowerEndpoint());
      }
      if (totalIntersect < len) {
        currentReadIsRemote = true;
        break;
      }
      range = range.next;
    }
  } catch (Throwable e) {
    // ignoring any exception in this code path as it is used to collect
    // remote readers metric in profile for debugging
    logger.debug("computeLocality failed with message: {} for path {}", e.getMessage(), path, e);
  }

  if (currentReadIsRemote) {
    this.remoteRead = true;
  }
}
 
Example 17
Source File: DremioORCRecordUtils.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private void computeLocality(FileSystem fs, Path path, DiskRangeList range) {
  if (this.remoteRead) {
    return;
  }

  boolean currentReadIsRemote = false;
  try {
    String localHost = InetAddress.getLocalHost().getCanonicalHostName();
    while (range != null) {
      int len = (int) (range.getEnd() - range.getOffset());
      long off = range.getOffset();
      BlockLocation[] blockLocations = fs.getFileBlockLocations(path, off, len);
      List<Range<Long>> intersectingRanges = new ArrayList<>();
      Range<Long> rowGroupRange = Range.openClosed(off, off+len);
      for (BlockLocation loc : blockLocations) {
        for (String host : loc.getHosts()) {
          if (host.equals(localHost)) {
            intersectingRanges.add(Range.closedOpen(loc.getOffset(), loc.getOffset() + loc.getLength()).intersection(rowGroupRange));
          }
        }
      }
      long totalIntersect = 0;
      for (Range<Long> intersectingRange : intersectingRanges) {
        totalIntersect += (intersectingRange.upperEndpoint() - intersectingRange.lowerEndpoint());
      }
      if (totalIntersect < len) {
        currentReadIsRemote = true;
        break;
      }
      range = range.next;
    }
  } catch (Throwable e) {
    // ignoring any exception in this code path as it is used to collect
    // remote readers metric in profile for debugging
    logger.debug("computeLocality failed with message: {} for path {}", e.getMessage(), path, e);
  }

  if (currentReadIsRemote) {
    this.remoteRead = true;
  }
}
 
Example 18
Source File: InputStriper.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * @param inputDir Pool used to resolve block locations.
 * @param bytes Target byte count
 * @param nLocs Number of block locations per split.
 * @return A set of files satisfying the byte count, with locations weighted
 *         to the dominating proportion of input bytes.
 */
CombineFileSplit splitFor(FilePool inputDir, long bytes, int nLocs)
    throws IOException {
  final ArrayList<Path> paths = new ArrayList<Path>();
  final ArrayList<Long> start = new ArrayList<Long>();
  final ArrayList<Long> length = new ArrayList<Long>();
  final HashMap<String,Double> sb = new HashMap<String,Double>();
  do {
    paths.add(current.getPath());
    start.add(currentStart);
    final long fromFile = Math.min(bytes, current.getLen() - currentStart);
    length.add(fromFile);
    for (BlockLocation loc :
        inputDir.locationsFor(current, currentStart, fromFile)) {
      final double tedium = loc.getLength() / (1.0 * bytes);
      for (String l : loc.getHosts()) {
        Double j = sb.get(l);
        if (null == j) {
          sb.put(l, tedium);
        } else {
          sb.put(l, j.doubleValue() + tedium);
        }
      }
    }
    currentStart += fromFile;
    bytes -= fromFile;
    // Switch to a new file if
    //  - the current file is uncompressed and completely used
    //  - the current file is compressed
    
    CompressionCodecFactory compressionCodecs = 
      new CompressionCodecFactory(conf);
    CompressionCodec codec = compressionCodecs.getCodec(current.getPath());
    if (current.getLen() - currentStart == 0
        || codec != null) {
      current = files.get(++idx % files.size());
      currentStart = 0;
    }
  } while (bytes > 0);
  final ArrayList<Entry<String,Double>> sort =
    new ArrayList<Entry<String,Double>>(sb.entrySet());
  Collections.sort(sort, hostRank);
  final String[] hosts = new String[Math.min(nLocs, sort.size())];
  for (int i = 0; i < nLocs && i < sort.size(); ++i) {
    hosts[i] = sort.get(i).getKey();
  }
  return new CombineFileSplit(paths.toArray(new Path[0]),
      toLongArray(start), toLongArray(length), hosts);
}
 
Example 19
Source File: DatanodeBenThread.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public void write() throws Exception {
  long endTime = System.currentTimeMillis() + rtc.max_time;
  long currentId = 0;
  FSDataOutputStream out = null;
  DistributedFileSystem dfs = (DistributedFileSystem) fs;
  while (System.currentTimeMillis() < endTime
      && currentId < rtc.max_files) {
    if (running_type == RUNNING_TYPE.PREPARE) {
      //The number of files reach the minimum limit, exit
      if (getNumberOfFiles() > rtc.min_file) 
        break;
    }
    Path fileName = new Path(outputPath, file_prefix + currentId);
    try { 
      out = dfs.create(fileName,
                       FsPermission.getDefault(),
                       false,
                       dfs.getConf().getInt("io.file.buffer.size", 4096),
                       (short)replication,
                       dfs.getDefaultBlockSize(),
                       dfs.getConf().getInt("io.bytes.per.checksum", 512),
                       null,
                       rtc.victims);
      long size = 0;
      while (true) {
        rb.nextBytes(buffer);
        tb.getTokens(rtc.buffer_size);
        out.write(buffer, 0, rtc.buffer_size);
        size += rtc.buffer_size;
        if (System.currentTimeMillis() > endTime 
            || size + rtc.buffer_size > max_size) {
          // Roll the file
          out.close();
          out = null;
          currentId++;
          files_processed++;
          processed_size += size;
          write_size += size;
          Path fullName = fs.makeQualified(fileName);
          BlockLocation bl = dfs.getClient().getBlockLocations(
              fullName.toUri().getPath(), 0L, 1L)[0];
          String hosts = "";
          for (String host: bl.getHosts()) {
            hosts += host + " ";
          }
          LOG.info("[close (" + size + "B)] " + hosts + " file " + fullName);
          break;
        }
      }
    } catch (Exception e) {
      LOG.error("Error in writing file:" + fileName, e);
      this.errors.add(e);
    } finally {
      IOUtils.closeStream(out);
    }
  }
}
 
Example 20
Source File: InputStriper.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * @param inputDir Pool used to resolve block locations.
 * @param bytes Target byte count
 * @param nLocs Number of block locations per split.
 * @return A set of files satisfying the byte count, with locations weighted
 *         to the dominating proportion of input bytes.
 */
CombineFileSplit splitFor(FilePool inputDir, long bytes, int nLocs)
    throws IOException {
  final ArrayList<Path> paths = new ArrayList<Path>();
  final ArrayList<Long> start = new ArrayList<Long>();
  final ArrayList<Long> length = new ArrayList<Long>();
  final HashMap<String,Double> sb = new HashMap<String,Double>();
  do {
    paths.add(current.getPath());
    start.add(currentStart);
    final long fromFile = Math.min(bytes, current.getLen() - currentStart);
    length.add(fromFile);
    for (BlockLocation loc :
        inputDir.locationsFor(current, currentStart, fromFile)) {
      final double tedium = loc.getLength() / (1.0 * bytes);
      for (String l : loc.getHosts()) {
        Double j = sb.get(l);
        if (null == j) {
          sb.put(l, tedium);
        } else {
          sb.put(l, j.doubleValue() + tedium);
        }
      }
    }
    currentStart += fromFile;
    bytes -= fromFile;
    if (current.getLen() - currentStart == 0) {
      current = files.get(++idx % files.size());
      currentStart = 0;
    }
  } while (bytes > 0);
  final ArrayList<Entry<String,Double>> sort =
    new ArrayList<Entry<String,Double>>(sb.entrySet());
  Collections.sort(sort, hostRank);
  final String[] hosts = new String[Math.min(nLocs, sort.size())];
  for (int i = 0; i < nLocs && i < sort.size(); ++i) {
    hosts[i] = sort.get(i).getKey();
  }
  return new CombineFileSplit(paths.toArray(new Path[0]),
      toLongArray(start), toLongArray(length), hosts);
}