Java Code Examples for org.apache.hadoop.util.DataChecksum#getBytesPerChecksum()

The following examples show how to use org.apache.hadoop.util.DataChecksum#getBytesPerChecksum() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FanOutOneBlockAsyncDFSOutput.java    From hbase with Apache License 2.0 6 votes vote down vote up
FanOutOneBlockAsyncDFSOutput(Configuration conf,DistributedFileSystem dfs,
    DFSClient client, ClientProtocol namenode, String clientName, String src, long fileId,
    LocatedBlock locatedBlock, Encryptor encryptor, List<Channel> datanodeList,
    DataChecksum summer, ByteBufAllocator alloc) {
  this.conf = conf;
  this.dfs = dfs;
  this.client = client;
  this.namenode = namenode;
  this.fileId = fileId;
  this.clientName = clientName;
  this.src = src;
  this.block = locatedBlock.getBlock();
  this.locations = locatedBlock.getLocations();
  this.encryptor = encryptor;
  this.datanodeList = datanodeList;
  this.summer = summer;
  this.maxDataLen = MAX_DATA_LEN - (MAX_DATA_LEN % summer.getBytesPerChecksum());
  this.alloc = alloc;
  this.buf = alloc.directBuffer(sendBufSizePRedictor.initialSize());
  this.state = State.STREAMING;
  setupReceiver(conf.getInt(DFS_CLIENT_SOCKET_TIMEOUT_KEY, READ_TIMEOUT));
}
 
Example 2
Source File: RemoteBlockReader.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private RemoteBlockReader(String file, String bpid, long blockId,
    DataInputStream in, DataChecksum checksum, boolean verifyChecksum,
    long startOffset, long firstChunkOffset, long bytesToRead, Peer peer,
    DatanodeID datanodeID, PeerCache peerCache) {
  // Path is used only for printing block and file information in debug
  super(new Path("/" + Block.BLOCK_FILE_PREFIX + blockId +
                  ":" + bpid + ":of:"+ file)/*too non path-like?*/,
        1, verifyChecksum,
        checksum.getChecksumSize() > 0? checksum : null, 
        checksum.getBytesPerChecksum(),
        checksum.getChecksumSize());

  this.isLocal = DFSClient.isLocalAddress(NetUtils.
      createSocketAddr(datanodeID.getXferAddr()));
  
  this.peer = peer;
  this.datanodeID = datanodeID;
  this.in = in;
  this.checksum = checksum;
  this.startOffset = Math.max( startOffset, 0 );
  this.blockId = blockId;

  // The total number of bytes that we need to transfer from the DN is
  // the amount that the user wants (bytesToRead), plus the padding at
  // the beginning in order to chunk-align. Note that the DN may elect
  // to send more than this amount if the read starts/ends mid-chunk.
  this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset);

  this.firstChunkOffset = firstChunkOffset;
  lastChunkOffset = firstChunkOffset;
  lastChunkLen = -1;

  bytesPerChecksum = this.checksum.getBytesPerChecksum();
  checksumSize = this.checksum.getChecksumSize();
  this.peerCache = peerCache;
}
 
Example 3
Source File: BlockReader.java    From RDFS with Apache License 2.0 5 votes vote down vote up
protected BlockReader(Path file, int numRetries, DataChecksum checksum, boolean verifyChecksum) {
  super(file,
      numRetries,
      verifyChecksum,
      checksum.getChecksumSize() > 0? checksum : null, 
      checksum.getBytesPerChecksum(),
      checksum.getChecksumSize());       
}
 
Example 4
Source File: DataXceiver.java    From big-c with Apache License 2.0 5 votes vote down vote up
private MD5Hash calcPartialBlockChecksum(ExtendedBlock block,
    long requestLength, DataChecksum checksum, DataInputStream checksumIn)
    throws IOException {
  final int bytesPerCRC = checksum.getBytesPerChecksum();
  final int csize = checksum.getChecksumSize();
  final byte[] buffer = new byte[4*1024];
  MessageDigest digester = MD5Hash.getDigester();

  long remaining = requestLength / bytesPerCRC * csize;
  for (int toDigest = 0; remaining > 0; remaining -= toDigest) {
    toDigest = checksumIn.read(buffer, 0,
        (int) Math.min(remaining, buffer.length));
    if (toDigest < 0) {
      break;
    }
    digester.update(buffer, 0, toDigest);
  }
  
  int partialLength = (int) (requestLength % bytesPerCRC);
  if (partialLength > 0) {
    byte[] buf = new byte[partialLength];
    final InputStream blockIn = datanode.data.getBlockInputStream(block,
        requestLength - partialLength);
    try {
      // Get the CRC of the partialLength.
      IOUtils.readFully(blockIn, buf, 0, partialLength);
    } finally {
      IOUtils.closeStream(blockIn);
    }
    checksum.update(buf, 0, partialLength);
    byte[] partialCrc = new byte[csize];
    checksum.writeValue(partialCrc, 0, true);
    digester.update(partialCrc);
  }
  return new MD5Hash(digester.digest());
}
 
Example 5
Source File: RemoteBlockReader.java    From big-c with Apache License 2.0 5 votes vote down vote up
private RemoteBlockReader(String file, String bpid, long blockId,
    DataInputStream in, DataChecksum checksum, boolean verifyChecksum,
    long startOffset, long firstChunkOffset, long bytesToRead, Peer peer,
    DatanodeID datanodeID, PeerCache peerCache) {
  // Path is used only for printing block and file information in debug
  super(new Path("/" + Block.BLOCK_FILE_PREFIX + blockId +
                  ":" + bpid + ":of:"+ file)/*too non path-like?*/,
        1, verifyChecksum,
        checksum.getChecksumSize() > 0? checksum : null, 
        checksum.getBytesPerChecksum(),
        checksum.getChecksumSize());

  this.isLocal = DFSClient.isLocalAddress(NetUtils.
      createSocketAddr(datanodeID.getXferAddr()));
  
  this.peer = peer;
  this.datanodeID = datanodeID;
  this.in = in;
  this.checksum = checksum;
  this.startOffset = Math.max( startOffset, 0 );
  this.blockId = blockId;

  // The total number of bytes that we need to transfer from the DN is
  // the amount that the user wants (bytesToRead), plus the padding at
  // the beginning in order to chunk-align. Note that the DN may elect
  // to send more than this amount if the read starts/ends mid-chunk.
  this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset);

  this.firstChunkOffset = firstChunkOffset;
  lastChunkOffset = firstChunkOffset;
  lastChunkLen = -1;

  bytesPerChecksum = this.checksum.getBytesPerChecksum();
  checksumSize = this.checksum.getChecksumSize();
  this.peerCache = peerCache;
}
 
Example 6
Source File: DFSOutputStream.java    From big-c with Apache License 2.0 5 votes vote down vote up
private DFSOutputStream(DFSClient dfsClient, String src, Progressable progress,
    HdfsFileStatus stat, DataChecksum checksum) throws IOException {
  super(getChecksum4Compute(checksum, stat));
  this.dfsClient = dfsClient;
  this.src = src;
  this.fileId = stat.getFileId();
  this.blockSize = stat.getBlockSize();
  this.blockReplication = stat.getReplication();
  this.fileEncryptionInfo = stat.getFileEncryptionInfo();
  this.progress = progress;
  this.cachingStrategy = new AtomicReference<CachingStrategy>(
      dfsClient.getDefaultWriteCachingStrategy());
  if ((progress != null) && DFSClient.LOG.isDebugEnabled()) {
    DFSClient.LOG.debug(
        "Set non-null progress callback on DFSOutputStream " + src);
  }
  
  this.bytesPerChecksum = checksum.getBytesPerChecksum();
  if (bytesPerChecksum <= 0) {
    throw new HadoopIllegalArgumentException(
        "Invalid value: bytesPerChecksum = " + bytesPerChecksum + " <= 0");
  }
  if (blockSize % bytesPerChecksum != 0) {
    throw new HadoopIllegalArgumentException("Invalid values: "
        + DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY + " (=" + bytesPerChecksum
        + ") must divide block size (=" + blockSize + ").");
  }
  this.checksum4WriteBlock = checksum;

  this.dfsclientSlowLogThresholdMs =
    dfsClient.getConf().dfsclientSlowIoWarningThresholdMs;
  this.byteArrayManager = dfsClient.getClientContext().getByteArrayManager();
}
 
Example 7
Source File: DataXceiver.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private MD5Hash calcPartialBlockChecksum(ExtendedBlock block,
    long requestLength, DataChecksum checksum, DataInputStream checksumIn)
    throws IOException {
  final int bytesPerCRC = checksum.getBytesPerChecksum();
  final int csize = checksum.getChecksumSize();
  final byte[] buffer = new byte[4*1024];
  MessageDigest digester = MD5Hash.getDigester();

  long remaining = requestLength / bytesPerCRC * csize;
  for (int toDigest = 0; remaining > 0; remaining -= toDigest) {
    toDigest = checksumIn.read(buffer, 0,
        (int) Math.min(remaining, buffer.length));
    if (toDigest < 0) {
      break;
    }
    digester.update(buffer, 0, toDigest);
  }
  
  int partialLength = (int) (requestLength % bytesPerCRC);
  if (partialLength > 0) {
    byte[] buf = new byte[partialLength];
    final InputStream blockIn = datanode.data.getBlockInputStream(block,
        requestLength - partialLength);
    try {
      // Get the CRC of the partialLength.
      IOUtils.readFully(blockIn, buf, 0, partialLength);
    } finally {
      IOUtils.closeStream(blockIn);
    }
    checksum.update(buf, 0, partialLength);
    byte[] partialCrc = new byte[csize];
    checksum.writeValue(partialCrc, 0, true);
    digester.update(partialCrc);
  }
  return new MD5Hash(digester.digest());
}
 
Example 8
Source File: DFSOutputStream.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private DFSOutputStream(DFSClient dfsClient, String src, Progressable progress,
    HdfsFileStatus stat, DataChecksum checksum) throws IOException {
  super(getChecksum4Compute(checksum, stat));
  this.dfsClient = dfsClient;
  this.src = src;
  this.fileId = stat.getFileId();
  this.blockSize = stat.getBlockSize();
  this.blockReplication = stat.getReplication();
  this.fileEncryptionInfo = stat.getFileEncryptionInfo();
  this.progress = progress;
  this.cachingStrategy = new AtomicReference<CachingStrategy>(
      dfsClient.getDefaultWriteCachingStrategy());
  if ((progress != null) && DFSClient.LOG.isDebugEnabled()) {
    DFSClient.LOG.debug(
        "Set non-null progress callback on DFSOutputStream " + src);
  }
  
  this.bytesPerChecksum = checksum.getBytesPerChecksum();
  if (bytesPerChecksum <= 0) {
    throw new HadoopIllegalArgumentException(
        "Invalid value: bytesPerChecksum = " + bytesPerChecksum + " <= 0");
  }
  if (blockSize % bytesPerChecksum != 0) {
    throw new HadoopIllegalArgumentException("Invalid values: "
        + DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY + " (=" + bytesPerChecksum
        + ") must divide block size (=" + blockSize + ").");
  }
  this.checksum4WriteBlock = checksum;

  this.dfsclientSlowLogThresholdMs =
    dfsClient.getConf().dfsclientSlowIoWarningThresholdMs;
  this.byteArrayManager = dfsClient.getClientContext().getByteArrayManager();
}
 
Example 9
Source File: DataXceiver.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void blockChecksum(final ExtendedBlock block,
    final Token<BlockTokenIdentifier> blockToken) throws IOException {
  final DataOutputStream out = new DataOutputStream(
      getOutputStream());
  checkAccess(out, true, block, blockToken,
      Op.BLOCK_CHECKSUM, BlockTokenSecretManager.AccessMode.READ);
  // client side now can specify a range of the block for checksum
  long requestLength = block.getNumBytes();
  Preconditions.checkArgument(requestLength >= 0);
  long visibleLength = datanode.data.getReplicaVisibleLength(block);
  boolean partialBlk = requestLength < visibleLength;

  updateCurrentThreadName("Reading metadata for block " + block);
  final LengthInputStream metadataIn = datanode.data
      .getMetaDataInputStream(block);
  
  final DataInputStream checksumIn = new DataInputStream(
      new BufferedInputStream(metadataIn, HdfsConstants.IO_FILE_BUFFER_SIZE));
  updateCurrentThreadName("Getting checksum for block " + block);
  try {
    //read metadata file
    final BlockMetadataHeader header = BlockMetadataHeader
        .readHeader(checksumIn);
    final DataChecksum checksum = header.getChecksum();
    final int csize = checksum.getChecksumSize();
    final int bytesPerCRC = checksum.getBytesPerChecksum();
    final long crcPerBlock = csize <= 0 ? 0 : 
      (metadataIn.getLength() - BlockMetadataHeader.getHeaderSize()) / csize;

    final MD5Hash md5 = partialBlk && crcPerBlock > 0 ? 
        calcPartialBlockChecksum(block, requestLength, checksum, checksumIn)
          : MD5Hash.digest(checksumIn);
    if (LOG.isDebugEnabled()) {
      LOG.debug("block=" + block + ", bytesPerCRC=" + bytesPerCRC
          + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5);
    }

    //write reply
    BlockOpResponseProto.newBuilder()
      .setStatus(SUCCESS)
      .setChecksumResponse(OpBlockChecksumResponseProto.newBuilder()             
        .setBytesPerCrc(bytesPerCRC)
        .setCrcPerBlock(crcPerBlock)
        .setMd5(ByteString.copyFrom(md5.getDigest()))
        .setCrcType(PBHelper.convert(checksum.getChecksumType())))
      .build()
      .writeDelimitedTo(out);
    out.flush();
  } catch (IOException ioe) {
    LOG.info("blockChecksum " + block + " received exception " + ioe);
    incrDatanodeNetworkErrors();
    throw ioe;
  } finally {
    IOUtils.closeStream(out);
    IOUtils.closeStream(checksumIn);
    IOUtils.closeStream(metadataIn);
  }

  //update metrics
  datanode.metrics.addBlockChecksumOp(elapsed());
}
 
Example 10
Source File: FSOutputSummer.java    From hadoop with Apache License 2.0 4 votes vote down vote up
protected FSOutputSummer(DataChecksum sum) {
  this.sum = sum;
  this.buf = new byte[sum.getBytesPerChecksum() * BUFFER_NUM_CHUNKS];
  this.checksum = new byte[getChecksumSize() * BUFFER_NUM_CHUNKS];
  this.count = 0;
}
 
Example 11
Source File: FSDataset.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/** {@inheritDoc} */
public void validateBlockMetadata(int namespaceId, Block b) throws IOException {
  DatanodeBlockInfo info;
  lock.readLock().lock();
  try {
    info = volumeMap.get(namespaceId, b);
  } finally {
    lock.readLock().unlock();
  }
  if (info == null) {
    throw new IOException("Block " + b + " does not exist in volumeMap.");
  }
  FSVolume v = info.getVolume();
  File tmp = v.getTmpFile(namespaceId, b);
  File f = info.getFile();
  long fileSize;
  if (f == null) {
    f = tmp;
    if (f == null) {
      throw new IOException("Block " + b + " does not exist on disk.");
    }
    if (!f.exists()) {
      throw new IOException("Block " + b + 
                            " block file " + f +
                            " does not exist on disk.");
    }
    fileSize = f.length();
  } else {
    if (info.isFinalized()) {
      info.verifyFinalizedSize();
      fileSize = info.getFinalizedSize();
    } else {
      fileSize = f.length();
    }
  }
  if (b.getNumBytes() > fileSize) {
    throw new IOException("Block " + b + 
                          " length is " + b.getNumBytes()  +
                          " does not match block file length " +
                          f.length());
  }
  File meta = getMetaFile(f, b);
  if (meta == null) {
    throw new IOException("Block " + b + 
                          " metafile does not exist.");
  }
  if (!meta.exists()) {
    throw new IOException("Block " + b + 
                          " metafile " + meta +
                          " does not exist on disk.");
  }
  long metaFileSize = meta.length();
  if (metaFileSize == 0 && fileSize > 0) {
    throw new IOException("Block " + b + " metafile " + meta + " is empty.");
  }
  long stamp = parseGenerationStamp(f, meta);
  if (stamp != b.getGenerationStamp()) {
    throw new IOException("Block " + b + 
                          " genstamp is " + b.getGenerationStamp()  +
                          " does not match meta file stamp " +
                          stamp);
  }
  if (metaFileSize == 0) {
    // no need to check metadata size for 0 size file
    return;
  }
  // verify that checksum file has an integral number of checkum values.
  DataChecksum dcs = BlockMetadataHeader.readHeader(meta).getChecksum();
  int checksumsize = dcs.getChecksumSize();
  long actual = metaFileSize - BlockMetadataHeader.getHeaderSize();
  long numChunksInMeta = actual/checksumsize;
  if (actual % checksumsize != 0) {
    throw new IOException("Block " + b +
                          " has a checksum file of size " + metaFileSize +
                          " but it does not align with checksum size of " +
                          checksumsize);
  }
  int bpc = dcs.getBytesPerChecksum();
  long minDataSize = (numChunksInMeta - 1) * bpc;
  long maxDataSize = numChunksInMeta * bpc;
  if (fileSize > maxDataSize || fileSize <= minDataSize) {
    throw new IOException("Block " + b +
                          " is of size " + f.length() +
                          " but has " + (numChunksInMeta + 1) +
                          " checksums and each checksum size is " +
                          checksumsize + " bytes.");
  }
  // We could crc-check the entire block here, but it will be a costly 
  // operation. Instead we rely on the above check (file length mismatch)
  // to detect corrupt blocks.
}
 
Example 12
Source File: RemoteBlockReader.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Create a new BlockReader specifically to satisfy a read.
 * This method also sends the OP_READ_BLOCK request.
 *
 * @param file  File location
 * @param block  The block object
 * @param blockToken  The block token for security
 * @param startOffset  The read offset, relative to block head
 * @param len  The number of bytes to read
 * @param bufferSize  The IO buffer size (not the client buffer size)
 * @param verifyChecksum  Whether to verify checksum
 * @param clientName  Client name
 * @return New BlockReader instance, or null on error.
 */
public static RemoteBlockReader newBlockReader(String file,
                                   ExtendedBlock block, 
                                   Token<BlockTokenIdentifier> blockToken,
                                   long startOffset, long len,
                                   int bufferSize, boolean verifyChecksum,
                                   String clientName, Peer peer,
                                   DatanodeID datanodeID,
                                   PeerCache peerCache,
                                   CachingStrategy cachingStrategy)
                                     throws IOException {
  // in and out will be closed when sock is closed (by the caller)
  final DataOutputStream out =
      new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
  new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
      verifyChecksum, cachingStrategy);
  
  //
  // Get bytes in block, set streams
  //

  DataInputStream in = new DataInputStream(
      new BufferedInputStream(peer.getInputStream(), bufferSize));
  
  BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
      PBHelper.vintPrefixed(in));
  RemoteBlockReader2.checkSuccess(status, peer, block, file);
  ReadOpChecksumInfoProto checksumInfo =
    status.getReadOpChecksumInfo();
  DataChecksum checksum = DataTransferProtoUtil.fromProto(
      checksumInfo.getChecksum());
  //Warning when we get CHECKSUM_NULL?
  
  // Read the first chunk offset.
  long firstChunkOffset = checksumInfo.getChunkOffset();
  
  if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
      firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) {
    throw new IOException("BlockReader: error in first chunk offset (" +
                          firstChunkOffset + ") startOffset is " + 
                          startOffset + " for file " + file);
  }

  return new RemoteBlockReader(file, block.getBlockPoolId(), block.getBlockId(),
      in, checksum, verifyChecksum, startOffset, firstChunkOffset, len,
      peer, datanodeID, peerCache);
}
 
Example 13
Source File: RemoteBlockReader2.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Create a new BlockReader specifically to satisfy a read.
 * This method also sends the OP_READ_BLOCK request.
 *
 * @param file  File location
 * @param block  The block object
 * @param blockToken  The block token for security
 * @param startOffset  The read offset, relative to block head
 * @param len  The number of bytes to read
 * @param verifyChecksum  Whether to verify checksum
 * @param clientName  Client name
 * @param peer  The Peer to use
 * @param datanodeID  The DatanodeID this peer is connected to
 * @return New BlockReader instance, or null on error.
 */
public static BlockReader newBlockReader(String file,
                                   ExtendedBlock block,
                                   Token<BlockTokenIdentifier> blockToken,
                                   long startOffset, long len,
                                   boolean verifyChecksum,
                                   String clientName,
                                   Peer peer, DatanodeID datanodeID,
                                   PeerCache peerCache,
                                   CachingStrategy cachingStrategy) throws IOException {
  // in and out will be closed when sock is closed (by the caller)
  final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
        peer.getOutputStream()));
  new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
      verifyChecksum, cachingStrategy);

  //
  // Get bytes in block
  //
  DataInputStream in = new DataInputStream(peer.getInputStream());

  BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
      PBHelper.vintPrefixed(in));
  checkSuccess(status, peer, block, file);
  ReadOpChecksumInfoProto checksumInfo =
    status.getReadOpChecksumInfo();
  DataChecksum checksum = DataTransferProtoUtil.fromProto(
      checksumInfo.getChecksum());
  //Warning when we get CHECKSUM_NULL?

  // Read the first chunk offset.
  long firstChunkOffset = checksumInfo.getChunkOffset();

  if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
      firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) {
    throw new IOException("BlockReader: error in first chunk offset (" +
                          firstChunkOffset + ") startOffset is " +
                          startOffset + " for file " + file);
  }

  return new RemoteBlockReader2(file, block.getBlockPoolId(), block.getBlockId(),
      checksum, verifyChecksum, startOffset, firstChunkOffset, len, peer,
      datanodeID, peerCache);
}
 
Example 14
Source File: FsDatasetImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
static private void truncateBlock(File blockFile, File metaFile,
    long oldlen, long newlen) throws IOException {
  LOG.info("truncateBlock: blockFile=" + blockFile
      + ", metaFile=" + metaFile
      + ", oldlen=" + oldlen
      + ", newlen=" + newlen);

  if (newlen == oldlen) {
    return;
  }
  if (newlen > oldlen) {
    throw new IOException("Cannot truncate block to from oldlen (=" + oldlen
        + ") to newlen (=" + newlen + ")");
  }

  DataChecksum dcs = BlockMetadataHeader.readHeader(metaFile).getChecksum(); 
  int checksumsize = dcs.getChecksumSize();
  int bpc = dcs.getBytesPerChecksum();
  long n = (newlen - 1)/bpc + 1;
  long newmetalen = BlockMetadataHeader.getHeaderSize() + n*checksumsize;
  long lastchunkoffset = (n - 1)*bpc;
  int lastchunksize = (int)(newlen - lastchunkoffset); 
  byte[] b = new byte[Math.max(lastchunksize, checksumsize)]; 

  RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw");
  try {
    //truncate blockFile 
    blockRAF.setLength(newlen);
 
    //read last chunk
    blockRAF.seek(lastchunkoffset);
    blockRAF.readFully(b, 0, lastchunksize);
  } finally {
    blockRAF.close();
  }

  //compute checksum
  dcs.update(b, 0, lastchunksize);
  dcs.writeValue(b, 0, false);

  //update metaFile 
  RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw");
  try {
    metaRAF.setLength(newmetalen);
    metaRAF.seek(newmetalen - checksumsize);
    metaRAF.write(b, 0, checksumsize);
  } finally {
    metaRAF.close();
  }
}
 
Example 15
Source File: RemoteBlockReader2.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Create a new BlockReader specifically to satisfy a read.
 * This method also sends the OP_READ_BLOCK request.
 *
 * @param file  File location
 * @param block  The block object
 * @param blockToken  The block token for security
 * @param startOffset  The read offset, relative to block head
 * @param len  The number of bytes to read
 * @param verifyChecksum  Whether to verify checksum
 * @param clientName  Client name
 * @param peer  The Peer to use
 * @param datanodeID  The DatanodeID this peer is connected to
 * @return New BlockReader instance, or null on error.
 */
public static BlockReader newBlockReader(String file,
                                   ExtendedBlock block,
                                   Token<BlockTokenIdentifier> blockToken,
                                   long startOffset, long len,
                                   boolean verifyChecksum,
                                   String clientName,
                                   Peer peer, DatanodeID datanodeID,
                                   PeerCache peerCache,
                                   CachingStrategy cachingStrategy) throws IOException {
  // in and out will be closed when sock is closed (by the caller)
  final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
        peer.getOutputStream()));
  new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
      verifyChecksum, cachingStrategy);

  //
  // Get bytes in block
  //
  DataInputStream in = new DataInputStream(peer.getInputStream());

  BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
      PBHelper.vintPrefixed(in));
  checkSuccess(status, peer, block, file);
  ReadOpChecksumInfoProto checksumInfo =
    status.getReadOpChecksumInfo();
  DataChecksum checksum = DataTransferProtoUtil.fromProto(
      checksumInfo.getChecksum());
  //Warning when we get CHECKSUM_NULL?

  // Read the first chunk offset.
  long firstChunkOffset = checksumInfo.getChunkOffset();

  if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
      firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) {
    throw new IOException("BlockReader: error in first chunk offset (" +
                          firstChunkOffset + ") startOffset is " +
                          startOffset + " for file " + file);
  }

  return new RemoteBlockReader2(file, block.getBlockPoolId(), block.getBlockId(),
      checksum, verifyChecksum, startOffset, firstChunkOffset, len, peer,
      datanodeID, peerCache);
}
 
Example 16
Source File: BlockReader.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public static BlockReader newBlockReader( int dataTransferVersion,
                                   int namespaceId,
                                   Socket sock, String file,
                                   long blockId,
                                   long genStamp,
                                   long startOffset, long len,
                                   int bufferSize, boolean verifyChecksum,
                                   String clientName, long minSpeedBps)
                                   throws IOException {
  // in and out will be closed when sock is closed (by the caller)
  DataOutputStream out = new DataOutputStream(
    new BufferedOutputStream(NetUtils.getOutputStream(sock,HdfsConstants.WRITE_TIMEOUT)));

  //write the header.
  ReadBlockHeader readBlockHeader = new ReadBlockHeader(
      dataTransferVersion, namespaceId, blockId, genStamp, startOffset, len,
      clientName);
  readBlockHeader.writeVersionAndOpCode(out);
  readBlockHeader.write(out);
  out.flush();

  //
  // Get bytes in block, set streams
  //

  DataInputStream in = new DataInputStream(
      new BufferedInputStream(NetUtils.getInputStream(sock),
                              bufferSize));

  if ( in.readShort() != DataTransferProtocol.OP_STATUS_SUCCESS ) {
    throw new IOException("Got error in response to OP_READ_BLOCK " +
                          "self=" + sock.getLocalSocketAddress() +
                          ", remote=" + sock.getRemoteSocketAddress() +
                          " for file " + file +
                          " for block " + blockId);
  }
  DataChecksum checksum = DataChecksum.newDataChecksum( in , new PureJavaCrc32());
  //Warning when we get CHECKSUM_NULL?

  // Read the first chunk offset.
  long firstChunkOffset = in.readLong();

  if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
      firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) {
    throw new IOException("BlockReader: error in first chunk offset (" +
                          firstChunkOffset + ") startOffset is " +
                          startOffset + " for file " + file);
  }

  return new BlockReader(file, blockId, in, checksum, verifyChecksum,
      startOffset, firstChunkOffset, sock, minSpeedBps, dataTransferVersion);
}
 
Example 17
Source File: FSOutputSummer.java    From big-c with Apache License 2.0 4 votes vote down vote up
protected FSOutputSummer(DataChecksum sum) {
  this.sum = sum;
  this.buf = new byte[sum.getBytesPerChecksum() * BUFFER_NUM_CHUNKS];
  this.checksum = new byte[getChecksumSize() * BUFFER_NUM_CHUNKS];
  this.count = 0;
}
 
Example 18
Source File: BlockPoolSlice.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Find out the number of bytes in the block that match its crc.
 *
 * This algorithm assumes that data corruption caused by unexpected
 * datanode shutdown occurs only in the last crc chunk. So it checks
 * only the last chunk.
 *
 * @param blockFile the block file
 * @param genStamp generation stamp of the block
 * @return the number of valid bytes
 */
private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
  try {
    final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp);
    long blockFileLen = blockFile.length();
    long metaFileLen = metaFile.length();
    int crcHeaderLen = DataChecksum.getChecksumHeaderSize();
    if (!blockFile.exists() || blockFileLen == 0 ||
        !metaFile.exists() || metaFileLen < crcHeaderLen) {
      return 0;
    }
    try (DataInputStream checksumIn = new DataInputStream(
        new BufferedInputStream(
            fileIoProvider.getFileInputStream(volume, metaFile),
            ioFileBufferSize))) {
      // read and handle the common header here. For now just a version
      final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(
          checksumIn, metaFile);
      int bytesPerChecksum = checksum.getBytesPerChecksum();
      int checksumSize = checksum.getChecksumSize();
      long numChunks = Math.min(
          (blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum,
          (metaFileLen - crcHeaderLen) / checksumSize);
      if (numChunks == 0) {
        return 0;
      }
      try (InputStream blockIn = fileIoProvider.getFileInputStream(
          volume, blockFile);
           ReplicaInputStreams ris = new ReplicaInputStreams(blockIn,
               checksumIn, volume.obtainReference(), fileIoProvider)) {
        ris.skipChecksumFully((numChunks - 1) * checksumSize);
        long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum;
        ris.skipDataFully(lastChunkStartPos);
        int lastChunkSize = (int) Math.min(
            bytesPerChecksum, blockFileLen - lastChunkStartPos);
        byte[] buf = new byte[lastChunkSize + checksumSize];
        ris.readChecksumFully(buf, lastChunkSize, checksumSize);
        ris.readDataFully(buf, 0, lastChunkSize);
        checksum.update(buf, 0, lastChunkSize);
        long validFileLength;
        if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc
          validFileLength = lastChunkStartPos + lastChunkSize;
        } else { // last chunk is corrupt
          validFileLength = lastChunkStartPos;
        }
        // truncate if extra bytes are present without CRC
        if (blockFile.length() > validFileLength) {
          try (RandomAccessFile blockRAF =
                   fileIoProvider.getRandomAccessFile(
                       volume, blockFile, "rw")) {
            // truncate blockFile
            blockRAF.setLength(validFileLength);
          }
        }
        return validFileLength;
      }
    }
  } catch (IOException e) {
    FsDatasetImpl.LOG.warn("Getting exception while validating integrity " +
        "and setting length for blockFile", e);
    return 0;
  }
}
 
Example 19
Source File: RemoteBlockReader.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Create a new BlockReader specifically to satisfy a read.
 * This method also sends the OP_READ_BLOCK request.
 *
 * @param file  File location
 * @param block  The block object
 * @param blockToken  The block token for security
 * @param startOffset  The read offset, relative to block head
 * @param len  The number of bytes to read
 * @param bufferSize  The IO buffer size (not the client buffer size)
 * @param verifyChecksum  Whether to verify checksum
 * @param clientName  Client name
 * @return New BlockReader instance, or null on error.
 */
public static RemoteBlockReader newBlockReader(String file,
                                   ExtendedBlock block, 
                                   Token<BlockTokenIdentifier> blockToken,
                                   long startOffset, long len,
                                   int bufferSize, boolean verifyChecksum,
                                   String clientName, Peer peer,
                                   DatanodeID datanodeID,
                                   PeerCache peerCache,
                                   CachingStrategy cachingStrategy)
                                     throws IOException {
  // in and out will be closed when sock is closed (by the caller)
  final DataOutputStream out =
      new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
  new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
      verifyChecksum, cachingStrategy);
  
  //
  // Get bytes in block, set streams
  //

  DataInputStream in = new DataInputStream(
      new BufferedInputStream(peer.getInputStream(), bufferSize));
  
  BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
      PBHelper.vintPrefixed(in));
  RemoteBlockReader2.checkSuccess(status, peer, block, file);
  ReadOpChecksumInfoProto checksumInfo =
    status.getReadOpChecksumInfo();
  DataChecksum checksum = DataTransferProtoUtil.fromProto(
      checksumInfo.getChecksum());
  //Warning when we get CHECKSUM_NULL?
  
  // Read the first chunk offset.
  long firstChunkOffset = checksumInfo.getChunkOffset();
  
  if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
      firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) {
    throw new IOException("BlockReader: error in first chunk offset (" +
                          firstChunkOffset + ") startOffset is " + 
                          startOffset + " for file " + file);
  }

  return new RemoteBlockReader(file, block.getBlockPoolId(), block.getBlockId(),
      in, checksum, verifyChecksum, startOffset, firstChunkOffset, len,
      peer, datanodeID, peerCache);
}
 
Example 20
Source File: FSDataset.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
static private void truncateBlock(File blockFile, File metaFile,
    long oldlen, long newlen) throws IOException {
  if (newlen == oldlen) {
    return;
  }
  if (newlen > oldlen) {
    throw new IOException("Cannout truncate block to from oldlen (=" + oldlen
        + ") to newlen (=" + newlen + ")");
  }

  DataChecksum dcs = BlockMetadataHeader.readHeader(metaFile).getChecksum(); 
  int checksumsize = dcs.getChecksumSize();
  int bpc = dcs.getBytesPerChecksum();
  long n = (newlen - 1)/bpc + 1;
  long newmetalen = BlockMetadataHeader.getHeaderSize() + n*checksumsize;
  long lastchunkoffset = (n - 1)*bpc;
  int lastchunksize = (int)(newlen - lastchunkoffset); 
  byte[] b = new byte[Math.max(lastchunksize, checksumsize)]; 

  RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw");
  try {
    //truncate blockFile 
    blockRAF.setLength(newlen);
 
    //read last chunk
    blockRAF.seek(lastchunkoffset);
    blockRAF.readFully(b, 0, lastchunksize);
  } finally {
    blockRAF.close();
  }

  //compute checksum
  dcs.update(b, 0, lastchunksize);
  dcs.writeValue(b, 0, false);

  //update metaFile 
  RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw");
  try {
    metaRAF.setLength(newmetalen);
    metaRAF.seek(newmetalen - checksumsize);
    metaRAF.write(b, 0, checksumsize);
  } finally {
    metaRAF.close();
  }
}