Java Code Examples for org.apache.hadoop.hdfs.protocol.LocatedBlock#getStartOffset()

The following examples show how to use org.apache.hadoop.hdfs.protocol.LocatedBlock#getStartOffset() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDFSClientRetries.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private LocatedBlocks makeBadBlockList(LocatedBlocks goodBlockList) {
  LocatedBlock goodLocatedBlock = goodBlockList.get(0);
  LocatedBlock badLocatedBlock = new LocatedBlock(
    goodLocatedBlock.getBlock(),
    new DatanodeInfo[] {
      DFSTestUtil.getDatanodeInfo("1.2.3.4", "bogus", 1234)
    },
    goodLocatedBlock.getStartOffset(),
    false);


  List<LocatedBlock> badBlocks = new ArrayList<LocatedBlock>();
  badBlocks.add(badLocatedBlock);
  return new LocatedBlocks(goodBlockList.getFileLength(), false,
                           badBlocks, null, true,
                           null);
}
 
Example 2
Source File: TestDFSClientRetries.java    From big-c with Apache License 2.0 6 votes vote down vote up
private LocatedBlocks makeBadBlockList(LocatedBlocks goodBlockList) {
  LocatedBlock goodLocatedBlock = goodBlockList.get(0);
  LocatedBlock badLocatedBlock = new LocatedBlock(
    goodLocatedBlock.getBlock(),
    new DatanodeInfo[] {
      DFSTestUtil.getDatanodeInfo("1.2.3.4", "bogus", 1234)
    },
    goodLocatedBlock.getStartOffset(),
    false);


  List<LocatedBlock> badBlocks = new ArrayList<LocatedBlock>();
  badBlocks.add(badLocatedBlock);
  return new LocatedBlocks(goodBlockList.getFileLength(), false,
                           badBlocks, null, true,
                           null);
}
 
Example 3
Source File: DFSLocatedBlocks.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Determine whether the input block is the block under-construction
 * for the file. If the current file is not under-construction, always
 * false is returned.
 * 
 * The result is the best guess based on unknown
 * information. The bottom line is, when the position equals to file length,
 * the block selected will be return true. This has to be guaranteed to make
 * sure the available size updating logic will always be triggered when
 * reading to the end of a under-construction file.
 * 
 * @param block
 * @return
 */
public boolean isUnderConstructionBlock(Block block) {
  if (!isUnderConstruction()) {
    return false;
  }
  LocatedBlock lastBlock = this.get(this.locatedBlockCount() - 1);

  // There are potential inconsistency when counting the size of the
  // last block, but fileLength is not likely to be under-estimated
  // the size, unless the last block size is 0. 
  if ((this.fileLength <= lastBlock.getStartOffset()
      + lastBlock.getBlockSize())
      && lastBlock.getBlock().equals(block)) {
    return true;
  }
  return false;
}
 
Example 4
Source File: TestDFSLocatedBlocks.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Test
public void testBlockContainingOffset() {
  for (long blockSizeDelta = -1; blockSizeDelta <= 0; ++blockSizeDelta) {
    DFSLocatedBlocks locatedBlocks =
        randomDFSLocatedBlocks(1000, blockSizeDelta);
    LOG.info("Located blocks: " + locatedBlocks);
    List<LocatedBlock> allBlocks = locatedBlocks.getLocatedBlocks();
    for (LocatedBlock b : allBlocks) {
      long startOffset = b.getStartOffset();
      long endOffset = startOffset + b.getBlockSize();
      assertTrue(
          locatedBlocks.getBlockContainingOffset(startOffset - 1) != b);
      assertTrue(locatedBlocks.getBlockContainingOffset(startOffset) == b);
      assertTrue(locatedBlocks.getBlockContainingOffset(endOffset - 1) == b);
      assertTrue(locatedBlocks.getBlockContainingOffset(endOffset) != b);

      if (blockSizeDelta < 0) {
        // We have left gaps between blocks. Check that the byte immediately
        // before and the byte immediately after the block are not in any
        // block.
        assertTrue("b=" + b,
            locatedBlocks.getBlockContainingOffset(startOffset - 1) == null);
        assertTrue("b=" + b,
            locatedBlocks.getBlockContainingOffset(endOffset) == null);
      }
    }
  }
}
 
Example 5
Source File: TestInterDatanodeProtocol.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public static LocatedBlockWithMetaInfo getLastLocatedBlock(
    ClientProtocol namenode, String src
) throws IOException {
  //get block info for the last block
  LocatedBlocksWithMetaInfo locations = namenode.openAndFetchMetaInfo (src, 0, Long.MAX_VALUE);
  List<LocatedBlock> blocks = locations.getLocatedBlocks();
  DataNode.LOG.info("blocks.size()=" + blocks.size());
  assertTrue(blocks.size() > 0);

  LocatedBlock blk = blocks.get(blocks.size() - 1);
  return new LocatedBlockWithMetaInfo(blk.getBlock(), blk.getLocations(),
      blk.getStartOffset(), 
      locations.getDataProtocolVersion(), locations.getNamespaceID(),
      locations.getMethodFingerPrint());
}
 
Example 6
Source File: TestPlacementMonitor.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private BlockInfo createBlockInfo(Path file, LocatedBlock b) {
  DatanodeInfo[] locations = b.getLocations();
  String[] hosts = new String[locations.length];
  String[] names = new String[locations.length];
  for (int i = 0; i < locations.length; ++i) {
    DatanodeInfo d = locations[i];
    hosts[i] = d.getHost();
    names[i] = d.getName();
  }
  
  BlockLocation loc = new BlockLocation(
      names, hosts, b.getStartOffset(), b.getBlockSize());
  return new BlockInfo(loc, file);
}
 
Example 7
Source File: DFSInputStream.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private DNAddrPair chooseDataNode(LocatedBlock block,
    Collection<DatanodeInfo> ignoredNodes) throws IOException {
  while (true) {
    try {
      return getBestNodeDNAddrPair(block, ignoredNodes);
    } catch (IOException ie) {
      String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(),
        deadNodes, ignoredNodes);
      String blockInfo = block.getBlock() + " file=" + src;
      if (failures >= dfsClient.getMaxBlockAcquireFailures()) {
        String description = "Could not obtain block: " + blockInfo;
        DFSClient.LOG.warn(description + errMsg
            + ". Throwing a BlockMissingException");
        throw new BlockMissingException(src, description,
            block.getStartOffset());
      }

      DatanodeInfo[] nodes = block.getLocations();
      if (nodes == null || nodes.length == 0) {
        DFSClient.LOG.info("No node available for " + blockInfo);
      }
      DFSClient.LOG.info("Could not obtain " + block.getBlock()
          + " from any node: " + ie + errMsg
          + ". Will get new block locations from namenode and retry...");
      try {
        // Introducing a random factor to the wait time before another retry.
        // The wait time is dependent on # of failures and a random factor.
        // At the first time of getting a BlockMissingException, the wait time
        // is a random number between 0..3000 ms. If the first retry
        // still fails, we will wait 3000 ms grace period before the 2nd retry.
        // Also at the second retry, the waiting window is expanded to 6000 ms
        // alleviating the request rate from the server. Similarly the 3rd retry
        // will wait 6000ms grace period before retry and the waiting window is
        // expanded to 9000ms. 
        final int timeWindow = dfsClient.getConf().timeWindow;
        double waitTime = timeWindow * failures +       // grace period for the last round of attempt
          timeWindow * (failures + 1) * DFSUtil.getRandom().nextDouble(); // expanding time window for each failure
        DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failures + 1) + " IOException, will wait for " + waitTime + " msec.");
        Thread.sleep((long)waitTime);
      } catch (InterruptedException iex) {
      }
      deadNodes.clear(); //2nd option is to remove only nodes[blockId]
      openInfo();
      block = getBlockAt(block.getStartOffset());
      failures++;
      continue;
    }
  }
}
 
Example 8
Source File: DFSInputStream.java    From big-c with Apache License 2.0 4 votes vote down vote up
private DNAddrPair chooseDataNode(LocatedBlock block,
    Collection<DatanodeInfo> ignoredNodes) throws IOException {
  while (true) {
    try {
      return getBestNodeDNAddrPair(block, ignoredNodes);
    } catch (IOException ie) {
      String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(),
        deadNodes, ignoredNodes);
      String blockInfo = block.getBlock() + " file=" + src;
      if (failures >= dfsClient.getMaxBlockAcquireFailures()) {
        String description = "Could not obtain block: " + blockInfo;
        DFSClient.LOG.warn(description + errMsg
            + ". Throwing a BlockMissingException");
        throw new BlockMissingException(src, description,
            block.getStartOffset());
      }

      DatanodeInfo[] nodes = block.getLocations();
      if (nodes == null || nodes.length == 0) {
        DFSClient.LOG.info("No node available for " + blockInfo);
      }
      DFSClient.LOG.info("Could not obtain " + block.getBlock()
          + " from any node: " + ie + errMsg
          + ". Will get new block locations from namenode and retry...");
      try {
        // Introducing a random factor to the wait time before another retry.
        // The wait time is dependent on # of failures and a random factor.
        // At the first time of getting a BlockMissingException, the wait time
        // is a random number between 0..3000 ms. If the first retry
        // still fails, we will wait 3000 ms grace period before the 2nd retry.
        // Also at the second retry, the waiting window is expanded to 6000 ms
        // alleviating the request rate from the server. Similarly the 3rd retry
        // will wait 6000ms grace period before retry and the waiting window is
        // expanded to 9000ms. 
        final int timeWindow = dfsClient.getConf().timeWindow;
        double waitTime = timeWindow * failures +       // grace period for the last round of attempt
          timeWindow * (failures + 1) * DFSUtil.getRandom().nextDouble(); // expanding time window for each failure
        DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failures + 1) + " IOException, will wait for " + waitTime + " msec.");
        Thread.sleep((long)waitTime);
      } catch (InterruptedException iex) {
      }
      deadNodes.clear(); //2nd option is to remove only nodes[blockId]
      openInfo();
      block = getBlockAt(block.getStartOffset());
      failures++;
      continue;
    }
  }
}
 
Example 9
Source File: TestDFSLocatedBlocks.java    From RDFS with Apache License 2.0 4 votes vote down vote up
private static long getLastBlockEnd(List<LocatedBlock> blocks) {
  LocatedBlock lastBlk = blocks.get(blocks.size() - 1);
  return lastBlk.getStartOffset() + lastBlk.getBlockSize();
}
 
Example 10
Source File: DFSInputStream.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Get blocks in the specified range. The locations of all blocks
 * overlapping with the given segment of the file are retrieved. Fetch them
 * from the namenode if not cached.
 *
 * @param offset the offset of the segment to read
 * @param length the length of the segment to read
 * @return consequent segment of located blocks
 * @throws IOException
 */
private List<LocatedBlock> getBlockRange(final long offset,
    final long length) throws IOException {
  List<LocatedBlock> blockRange = new ArrayList<LocatedBlock>();
  // Zero length. Not sure this ever happens in practice.
  if (length == 0)
    return blockRange;

  // A defensive measure to ensure that we never loop here eternally.
  // With a 256 M block size, 10000 blocks will correspond to 2.5 TB.
  // No one should read this much data at once in practice.
  int maxLoops = 10000;

  // Copy locatedBlocks to a local data structure. This ensures that 
  // a concurrent invocation of openInfo() works OK, the reason being
  // that openInfo may completely replace locatedBlocks.
  DFSLocatedBlocks locatedBlocks = this.locatedBlocks;

  if (locatedBlocks == null) {
    // Make this an IO exception because this is input/output code error.
    throw new IOException("locatedBlocks is null");
  }

  long remaining = length;
  long curOff = offset;
  while (remaining > 0) {
    // a defensive check to bail out of this loop at all costs
    if (--maxLoops < 0) {
      String msg = "Failed to getBlockRange at offset " + offset +
                   ", length=" + length +
                   ", curOff=" + curOff +
                   ", remaining=" + remaining +
                   ". Aborting...";
      DFSClient.LOG.warn(msg);
      throw new IOException(msg); 
    }

    LocatedBlock blk = locatedBlocks.getBlockContainingOffset(curOff);
    if (blk == null) {
      LocatedBlocks newBlocks;
      newBlocks = getLocatedBlocks(src, curOff, remaining);
      if (newBlocks == null) {
        throw new IOException("Could not get block locations for curOff=" +
            curOff + ", remaining=" + remaining + " (offset=" + offset +
            ")");
      }
      locatedBlocks.insertRange(newBlocks.getLocatedBlocks());
      continue;
    }

    blockRange.add(blk);
    long bytesRead = blk.getStartOffset() + blk.getBlockSize() - curOff;
    remaining -= bytesRead;
    curOff += bytesRead;
  }

  DFSClient.checkBlockRange(blockRange, offset, length);

  return blockRange;
}
 
Example 11
Source File: DFSInputStream.java    From RDFS with Apache License 2.0 4 votes vote down vote up
private DNAddrPair chooseDataNode(LocatedBlock block)
  throws IOException {
  while (true) {
    DatanodeInfo[] nodes = block.getLocations();
    String blockInfo = block.getBlock() + " file=" + src;
    if(block.isCorrupt())
  	  throw new BlockMissingException(src, "Block: " + 
             blockInfo + " is corrupt ", block.getStartOffset());
    /*if(nodes.length == 1) {
  	  long lastContact = System.currentTimeMillis() - nodes[0].getLastUpdate();
  	  if(lastContact > 9000)
  		  throw new BlockMissingException(src, "Could not obtain block: " + 
  	              blockInfo, block.getStartOffset());
    }*/
    DatanodeInfo chosenNode = null;
    try {
      chosenNode = dfsClient.bestNode(nodes, deadNodes);
      InetSocketAddress targetAddr =
                        NetUtils.createSocketAddr(chosenNode.getName());
      return new DNAddrPair(chosenNode, targetAddr);
    } catch (IOException ie) {
      int failureTimes = DFSClient.dfsInputStreamfailures.get();
      if (failureTimes >= dfsClient.maxBlockAcquireFailures
          || failureTimes >= block.getLocations().length) {
        throw new BlockMissingException(src, "Could not obtain block: " + 
            blockInfo, block.getStartOffset());
      }

      if (nodes == null || nodes.length == 0) {
        DFSClient.LOG.info("No node available for block: " + blockInfo);
      }
      DFSClient.LOG.info("Could not obtain block " + block.getBlock() +
               " from node:  " +
               (chosenNode == null ? "" : chosenNode.getHostName()) + ie +
               ". Will get new block locations from namenode and retry...");       
      try {
        // Introducing a random factor to the wait time before another retry.
        // The wait time is dependent on # of failures and a random factor.
        // At the first time of getting a BlockMissingException, the wait time
        // is a random number between 0..3000 ms. If the first retry
        // still fails, we will wait 3000 ms grace period before the 2nd retry.
        // Also at the second retry, the waiting window is expanded to 6000 ms
        // alleviating the request rate from the server. Similarly the 3rd retry
        // will wait 6000ms grace period before retry and the waiting window is
        // expanded to 9000ms.
        // waitTime = grace period for the last round of attempt + 
        // expanding time window for each failure
        double waitTime = timeWindow * failureTimes + 
          timeWindow * (failureTimes + 1) * DFSClient.r.nextDouble(); 
        DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failureTimes + 1) + 
            " IOException, will wait for " + waitTime + " msec.", ie);
				Thread.sleep((long)waitTime);
      } catch (InterruptedException iex) {
      }
      deadNodes.clear(); //2nd option is to remove only nodes[blockId]
      openInfo();
      block = getBlockAt(block.getStartOffset(), false, true);
      DFSClient.dfsInputStreamfailures.set(failureTimes+1);
      continue;
    }
  }
}
 
Example 12
Source File: DFSLocatedBlocks.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public LocatedBlock getBlockContainingOffset(long offset) {
  readLock();
  try {
    int blockIdx = super.binarySearchBlockStartOffsets(offset);
    List<LocatedBlock> locatedBlocks = super.getLocatedBlocks();
    if (blockIdx >= 0)
      return locatedBlocks.get(blockIdx);  // exact match

    blockIdx = LocatedBlocks.getInsertIndex(blockIdx);
    // Here, blockIdx is the "insertion point" of the queried offset in
    // the array (the index of the first element greater than the offset),
    // which by definition means that
    //
    // locatedBlocks.get(blockIdx - 1).getStartOffset() < offset &&
    // offset < locatedBlocks.get(blockIdx).getStartOffset().
    //
    // In particular, if blockIdx == 0, then
    // offset < locatedBlocks.get(0).getStartOffset().

    if (blockIdx == 0)
      return null;  // The offset is not found in known blocks.

    LocatedBlock blk = locatedBlocks.get(blockIdx - 1);
    long blkStartOffset = blk.getStartOffset();
    if (offset < blkStartOffset) {
      // By definition of insertion point, 
      // locatedBlocks.get(blockIdx - 1).getStartOffset() < offset.
      throw new AssertionError("Invalid insertion point: " +
          blockIdx + " for offset " + offset + " (located blocks: " +
          locatedBlocks + ")");
    }

    long blkLen = blk.getBlockSize();
    if (offset < blkStartOffset + blkLen
        || (offset == blkStartOffset + blkLen && isUnderConstruction() &&
        blockIdx == locatedBlocks.size())) {
      return blk;
    }

    // Block not found in the location cache, the caller should ask the
    // namenode instead.
    return null;  

  } finally {
    readUnlock();
  }
}
 
Example 13
Source File: DFSClient.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Checks that the given block range covers the given file segment and
 * consists of contiguous blocks. This function assumes that the length
 * of the queried segment is non-zero, and a non-empty block list is
 * expected.
 * @param blockRange the set of blocks obtained for the given file segment
 * @param offset the start offset of the file segment
 * @param length the length of the file segment. Assumed to be positive.
 */
static void checkBlockRange(List<LocatedBlock> blockRange,
    long offset, long length) throws IOException {
  boolean isValid = false;

  if (!blockRange.isEmpty()) {
    int numBlocks = blockRange.size();
    LocatedBlock firstBlock = blockRange.get(0);
    LocatedBlock lastBlock = blockRange.get(numBlocks - 1);
    long segmentEnd = offset + length;

    // Check that the queried segment is between the beginning of the first
    // block and the end of the last block in the block range.
    if (firstBlock.getStartOffset() <= offset &&
        (segmentEnd <=
         lastBlock.getStartOffset() + lastBlock.getBlockSize())) {
      isValid = true;  // There is a chance the block list is valid
      LocatedBlock prevBlock = firstBlock;
      for (int i = 1; i < numBlocks; ++i) {
        // In this loop, prevBlock is always the block #(i - 1) and curBlock
        // is the block #i.
        long prevBlkEnd = prevBlock.getStartOffset() +
            prevBlock.getBlockSize();
        LocatedBlock curBlock = blockRange.get(i);
        long curBlkOffset = curBlock.getStartOffset();
        if (prevBlkEnd != curBlkOffset ||  // Blocks are not contiguous
            prevBlkEnd <= offset ||        // Previous block is redundant
            segmentEnd <= curBlkOffset) {  // Current block is redundant
          isValid = false;
          break;
        }
        prevBlock = curBlock;
      }
    }
  }

  if (!isValid) {
    throw new IOException("Got incorrect block range for " +
        "offset=" + offset + ", length=" + length + ": " +
        blockRange);
  }
}