Java Code Examples for org.apache.hadoop.util.DataChecksum#getChecksumSize()
The following examples show how to use
org.apache.hadoop.util.DataChecksum#getChecksumSize() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RemoteBlockReader.java From hadoop with Apache License 2.0 | 5 votes |
private RemoteBlockReader(String file, String bpid, long blockId, DataInputStream in, DataChecksum checksum, boolean verifyChecksum, long startOffset, long firstChunkOffset, long bytesToRead, Peer peer, DatanodeID datanodeID, PeerCache peerCache) { // Path is used only for printing block and file information in debug super(new Path("/" + Block.BLOCK_FILE_PREFIX + blockId + ":" + bpid + ":of:"+ file)/*too non path-like?*/, 1, verifyChecksum, checksum.getChecksumSize() > 0? checksum : null, checksum.getBytesPerChecksum(), checksum.getChecksumSize()); this.isLocal = DFSClient.isLocalAddress(NetUtils. createSocketAddr(datanodeID.getXferAddr())); this.peer = peer; this.datanodeID = datanodeID; this.in = in; this.checksum = checksum; this.startOffset = Math.max( startOffset, 0 ); this.blockId = blockId; // The total number of bytes that we need to transfer from the DN is // the amount that the user wants (bytesToRead), plus the padding at // the beginning in order to chunk-align. Note that the DN may elect // to send more than this amount if the read starts/ends mid-chunk. this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset); this.firstChunkOffset = firstChunkOffset; lastChunkOffset = firstChunkOffset; lastChunkLen = -1; bytesPerChecksum = this.checksum.getBytesPerChecksum(); checksumSize = this.checksum.getChecksumSize(); this.peerCache = peerCache; }
Example 2
Source File: DataXceiver.java From hadoop with Apache License 2.0 | 5 votes |
private MD5Hash calcPartialBlockChecksum(ExtendedBlock block, long requestLength, DataChecksum checksum, DataInputStream checksumIn) throws IOException { final int bytesPerCRC = checksum.getBytesPerChecksum(); final int csize = checksum.getChecksumSize(); final byte[] buffer = new byte[4*1024]; MessageDigest digester = MD5Hash.getDigester(); long remaining = requestLength / bytesPerCRC * csize; for (int toDigest = 0; remaining > 0; remaining -= toDigest) { toDigest = checksumIn.read(buffer, 0, (int) Math.min(remaining, buffer.length)); if (toDigest < 0) { break; } digester.update(buffer, 0, toDigest); } int partialLength = (int) (requestLength % bytesPerCRC); if (partialLength > 0) { byte[] buf = new byte[partialLength]; final InputStream blockIn = datanode.data.getBlockInputStream(block, requestLength - partialLength); try { // Get the CRC of the partialLength. IOUtils.readFully(blockIn, buf, 0, partialLength); } finally { IOUtils.closeStream(blockIn); } checksum.update(buf, 0, partialLength); byte[] partialCrc = new byte[csize]; checksum.writeValue(partialCrc, 0, true); digester.update(partialCrc); } return new MD5Hash(digester.digest()); }
Example 3
Source File: RemoteBlockReader.java From big-c with Apache License 2.0 | 5 votes |
private RemoteBlockReader(String file, String bpid, long blockId, DataInputStream in, DataChecksum checksum, boolean verifyChecksum, long startOffset, long firstChunkOffset, long bytesToRead, Peer peer, DatanodeID datanodeID, PeerCache peerCache) { // Path is used only for printing block and file information in debug super(new Path("/" + Block.BLOCK_FILE_PREFIX + blockId + ":" + bpid + ":of:"+ file)/*too non path-like?*/, 1, verifyChecksum, checksum.getChecksumSize() > 0? checksum : null, checksum.getBytesPerChecksum(), checksum.getChecksumSize()); this.isLocal = DFSClient.isLocalAddress(NetUtils. createSocketAddr(datanodeID.getXferAddr())); this.peer = peer; this.datanodeID = datanodeID; this.in = in; this.checksum = checksum; this.startOffset = Math.max( startOffset, 0 ); this.blockId = blockId; // The total number of bytes that we need to transfer from the DN is // the amount that the user wants (bytesToRead), plus the padding at // the beginning in order to chunk-align. Note that the DN may elect // to send more than this amount if the read starts/ends mid-chunk. this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset); this.firstChunkOffset = firstChunkOffset; lastChunkOffset = firstChunkOffset; lastChunkLen = -1; bytesPerChecksum = this.checksum.getBytesPerChecksum(); checksumSize = this.checksum.getChecksumSize(); this.peerCache = peerCache; }
Example 4
Source File: DataXceiver.java From big-c with Apache License 2.0 | 5 votes |
private MD5Hash calcPartialBlockChecksum(ExtendedBlock block, long requestLength, DataChecksum checksum, DataInputStream checksumIn) throws IOException { final int bytesPerCRC = checksum.getBytesPerChecksum(); final int csize = checksum.getChecksumSize(); final byte[] buffer = new byte[4*1024]; MessageDigest digester = MD5Hash.getDigester(); long remaining = requestLength / bytesPerCRC * csize; for (int toDigest = 0; remaining > 0; remaining -= toDigest) { toDigest = checksumIn.read(buffer, 0, (int) Math.min(remaining, buffer.length)); if (toDigest < 0) { break; } digester.update(buffer, 0, toDigest); } int partialLength = (int) (requestLength % bytesPerCRC); if (partialLength > 0) { byte[] buf = new byte[partialLength]; final InputStream blockIn = datanode.data.getBlockInputStream(block, requestLength - partialLength); try { // Get the CRC of the partialLength. IOUtils.readFully(blockIn, buf, 0, partialLength); } finally { IOUtils.closeStream(blockIn); } checksum.update(buf, 0, partialLength); byte[] partialCrc = new byte[csize]; checksum.writeValue(partialCrc, 0, true); digester.update(partialCrc); } return new MD5Hash(digester.digest()); }
Example 5
Source File: BlockReader.java From RDFS with Apache License 2.0 | 5 votes |
private BlockReader( String file, long blockId, DataInputStream in, DataChecksum checksum, boolean verifyChecksum, long startOffset, long firstChunkOffset, Socket dnSock, long minSpeedBps, long dataTransferVersion ) { super(new Path("/blk_" + blockId + ":of:" + file)/*too non path-like?*/, 1, verifyChecksum, checksum.getChecksumSize() > 0? checksum : null, checksum.getBytesPerChecksum(), checksum.getChecksumSize()); this.dnSock = dnSock; this.in = in; this.checksum = checksum; this.startOffset = Math.max( startOffset, 0 ); this.transferBlockSize = (dataTransferVersion >= DataTransferProtocol.SEND_DATA_LEN_VERSION); this.firstChunkOffset = firstChunkOffset; lastChunkOffset = firstChunkOffset; lastChunkLen = -1; bytesPerChecksum = this.checksum.getBytesPerChecksum(); checksumSize = this.checksum.getChecksumSize(); this.bytesRead = 0; this.timeRead = 0; this.minSpeedBps = minSpeedBps; this.slownessLoged = false; }
Example 6
Source File: BlockReader.java From RDFS with Apache License 2.0 | 5 votes |
protected BlockReader(Path file, int numRetries, DataChecksum checksum, boolean verifyChecksum) { super(file, numRetries, verifyChecksum, checksum.getChecksumSize() > 0? checksum : null, checksum.getBytesPerChecksum(), checksum.getChecksumSize()); }
Example 7
Source File: FsDatasetImpl.java From hadoop with Apache License 2.0 | 4 votes |
static private void truncateBlock(File blockFile, File metaFile, long oldlen, long newlen) throws IOException { LOG.info("truncateBlock: blockFile=" + blockFile + ", metaFile=" + metaFile + ", oldlen=" + oldlen + ", newlen=" + newlen); if (newlen == oldlen) { return; } if (newlen > oldlen) { throw new IOException("Cannot truncate block to from oldlen (=" + oldlen + ") to newlen (=" + newlen + ")"); } DataChecksum dcs = BlockMetadataHeader.readHeader(metaFile).getChecksum(); int checksumsize = dcs.getChecksumSize(); int bpc = dcs.getBytesPerChecksum(); long n = (newlen - 1)/bpc + 1; long newmetalen = BlockMetadataHeader.getHeaderSize() + n*checksumsize; long lastchunkoffset = (n - 1)*bpc; int lastchunksize = (int)(newlen - lastchunkoffset); byte[] b = new byte[Math.max(lastchunksize, checksumsize)]; RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); try { //truncate blockFile blockRAF.setLength(newlen); //read last chunk blockRAF.seek(lastchunkoffset); blockRAF.readFully(b, 0, lastchunksize); } finally { blockRAF.close(); } //compute checksum dcs.update(b, 0, lastchunksize); dcs.writeValue(b, 0, false); //update metaFile RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw"); try { metaRAF.setLength(newmetalen); metaRAF.seek(newmetalen - checksumsize); metaRAF.write(b, 0, checksumsize); } finally { metaRAF.close(); } }
Example 8
Source File: DataXceiver.java From hadoop with Apache License 2.0 | 4 votes |
@Override public void blockChecksum(final ExtendedBlock block, final Token<BlockTokenIdentifier> blockToken) throws IOException { final DataOutputStream out = new DataOutputStream( getOutputStream()); checkAccess(out, true, block, blockToken, Op.BLOCK_CHECKSUM, BlockTokenSecretManager.AccessMode.READ); // client side now can specify a range of the block for checksum long requestLength = block.getNumBytes(); Preconditions.checkArgument(requestLength >= 0); long visibleLength = datanode.data.getReplicaVisibleLength(block); boolean partialBlk = requestLength < visibleLength; updateCurrentThreadName("Reading metadata for block " + block); final LengthInputStream metadataIn = datanode.data .getMetaDataInputStream(block); final DataInputStream checksumIn = new DataInputStream( new BufferedInputStream(metadataIn, HdfsConstants.IO_FILE_BUFFER_SIZE)); updateCurrentThreadName("Getting checksum for block " + block); try { //read metadata file final BlockMetadataHeader header = BlockMetadataHeader .readHeader(checksumIn); final DataChecksum checksum = header.getChecksum(); final int csize = checksum.getChecksumSize(); final int bytesPerCRC = checksum.getBytesPerChecksum(); final long crcPerBlock = csize <= 0 ? 0 : (metadataIn.getLength() - BlockMetadataHeader.getHeaderSize()) / csize; final MD5Hash md5 = partialBlk && crcPerBlock > 0 ? calcPartialBlockChecksum(block, requestLength, checksum, checksumIn) : MD5Hash.digest(checksumIn); if (LOG.isDebugEnabled()) { LOG.debug("block=" + block + ", bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5); } //write reply BlockOpResponseProto.newBuilder() .setStatus(SUCCESS) .setChecksumResponse(OpBlockChecksumResponseProto.newBuilder() .setBytesPerCrc(bytesPerCRC) .setCrcPerBlock(crcPerBlock) .setMd5(ByteString.copyFrom(md5.getDigest())) .setCrcType(PBHelper.convert(checksum.getChecksumType()))) .build() .writeDelimitedTo(out); out.flush(); } catch (IOException ioe) { LOG.info("blockChecksum " + block + " received exception " + ioe); incrDatanodeNetworkErrors(); throw ioe; } finally { IOUtils.closeStream(out); IOUtils.closeStream(checksumIn); IOUtils.closeStream(metadataIn); } //update metrics datanode.metrics.addBlockChecksumOp(elapsed()); }
Example 9
Source File: FsDatasetImpl.java From big-c with Apache License 2.0 | 4 votes |
static private void truncateBlock(File blockFile, File metaFile, long oldlen, long newlen) throws IOException { LOG.info("truncateBlock: blockFile=" + blockFile + ", metaFile=" + metaFile + ", oldlen=" + oldlen + ", newlen=" + newlen); if (newlen == oldlen) { return; } if (newlen > oldlen) { throw new IOException("Cannot truncate block to from oldlen (=" + oldlen + ") to newlen (=" + newlen + ")"); } DataChecksum dcs = BlockMetadataHeader.readHeader(metaFile).getChecksum(); int checksumsize = dcs.getChecksumSize(); int bpc = dcs.getBytesPerChecksum(); long n = (newlen - 1)/bpc + 1; long newmetalen = BlockMetadataHeader.getHeaderSize() + n*checksumsize; long lastchunkoffset = (n - 1)*bpc; int lastchunksize = (int)(newlen - lastchunkoffset); byte[] b = new byte[Math.max(lastchunksize, checksumsize)]; RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); try { //truncate blockFile blockRAF.setLength(newlen); //read last chunk blockRAF.seek(lastchunkoffset); blockRAF.readFully(b, 0, lastchunksize); } finally { blockRAF.close(); } //compute checksum dcs.update(b, 0, lastchunksize); dcs.writeValue(b, 0, false); //update metaFile RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw"); try { metaRAF.setLength(newmetalen); metaRAF.seek(newmetalen - checksumsize); metaRAF.write(b, 0, checksumsize); } finally { metaRAF.close(); } }
Example 10
Source File: DataXceiver.java From big-c with Apache License 2.0 | 4 votes |
@Override public void blockChecksum(final ExtendedBlock block, final Token<BlockTokenIdentifier> blockToken) throws IOException { final DataOutputStream out = new DataOutputStream( getOutputStream()); checkAccess(out, true, block, blockToken, Op.BLOCK_CHECKSUM, BlockTokenSecretManager.AccessMode.READ); // client side now can specify a range of the block for checksum long requestLength = block.getNumBytes(); Preconditions.checkArgument(requestLength >= 0); long visibleLength = datanode.data.getReplicaVisibleLength(block); boolean partialBlk = requestLength < visibleLength; updateCurrentThreadName("Reading metadata for block " + block); final LengthInputStream metadataIn = datanode.data .getMetaDataInputStream(block); final DataInputStream checksumIn = new DataInputStream( new BufferedInputStream(metadataIn, HdfsConstants.IO_FILE_BUFFER_SIZE)); updateCurrentThreadName("Getting checksum for block " + block); try { //read metadata file final BlockMetadataHeader header = BlockMetadataHeader .readHeader(checksumIn); final DataChecksum checksum = header.getChecksum(); final int csize = checksum.getChecksumSize(); final int bytesPerCRC = checksum.getBytesPerChecksum(); final long crcPerBlock = csize <= 0 ? 0 : (metadataIn.getLength() - BlockMetadataHeader.getHeaderSize()) / csize; final MD5Hash md5 = partialBlk && crcPerBlock > 0 ? calcPartialBlockChecksum(block, requestLength, checksum, checksumIn) : MD5Hash.digest(checksumIn); if (LOG.isDebugEnabled()) { LOG.debug("block=" + block + ", bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5); } //write reply BlockOpResponseProto.newBuilder() .setStatus(SUCCESS) .setChecksumResponse(OpBlockChecksumResponseProto.newBuilder() .setBytesPerCrc(bytesPerCRC) .setCrcPerBlock(crcPerBlock) .setMd5(ByteString.copyFrom(md5.getDigest())) .setCrcType(PBHelper.convert(checksum.getChecksumType()))) .build() .writeDelimitedTo(out); out.flush(); } catch (IOException ioe) { LOG.info("blockChecksum " + block + " received exception " + ioe); incrDatanodeNetworkErrors(); throw ioe; } finally { IOUtils.closeStream(out); IOUtils.closeStream(checksumIn); IOUtils.closeStream(metadataIn); } //update metrics datanode.metrics.addBlockChecksumOp(elapsed()); }
Example 11
Source File: BlockPoolSlice.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Find out the number of bytes in the block that match its crc. * * This algorithm assumes that data corruption caused by unexpected * datanode shutdown occurs only in the last crc chunk. So it checks * only the last chunk. * * @param blockFile the block file * @param genStamp generation stamp of the block * @return the number of valid bytes */ private long validateIntegrityAndSetLength(File blockFile, long genStamp) { try { final File metaFile = FsDatasetUtil.getMetaFile(blockFile, genStamp); long blockFileLen = blockFile.length(); long metaFileLen = metaFile.length(); int crcHeaderLen = DataChecksum.getChecksumHeaderSize(); if (!blockFile.exists() || blockFileLen == 0 || !metaFile.exists() || metaFileLen < crcHeaderLen) { return 0; } try (DataInputStream checksumIn = new DataInputStream( new BufferedInputStream( fileIoProvider.getFileInputStream(volume, metaFile), ioFileBufferSize))) { // read and handle the common header here. For now just a version final DataChecksum checksum = BlockMetadataHeader.readDataChecksum( checksumIn, metaFile); int bytesPerChecksum = checksum.getBytesPerChecksum(); int checksumSize = checksum.getChecksumSize(); long numChunks = Math.min( (blockFileLen + bytesPerChecksum - 1) / bytesPerChecksum, (metaFileLen - crcHeaderLen) / checksumSize); if (numChunks == 0) { return 0; } try (InputStream blockIn = fileIoProvider.getFileInputStream( volume, blockFile); ReplicaInputStreams ris = new ReplicaInputStreams(blockIn, checksumIn, volume.obtainReference(), fileIoProvider)) { ris.skipChecksumFully((numChunks - 1) * checksumSize); long lastChunkStartPos = (numChunks - 1) * bytesPerChecksum; ris.skipDataFully(lastChunkStartPos); int lastChunkSize = (int) Math.min( bytesPerChecksum, blockFileLen - lastChunkStartPos); byte[] buf = new byte[lastChunkSize + checksumSize]; ris.readChecksumFully(buf, lastChunkSize, checksumSize); ris.readDataFully(buf, 0, lastChunkSize); checksum.update(buf, 0, lastChunkSize); long validFileLength; if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc validFileLength = lastChunkStartPos + lastChunkSize; } else { // last chunk is corrupt validFileLength = lastChunkStartPos; } // truncate if extra bytes are present without CRC if (blockFile.length() > validFileLength) { try (RandomAccessFile blockRAF = fileIoProvider.getRandomAccessFile( volume, blockFile, "rw")) { // truncate blockFile blockRAF.setLength(validFileLength); } } return validFileLength; } } } catch (IOException e) { FsDatasetImpl.LOG.warn("Getting exception while validating integrity " + "and setting length for blockFile", e); return 0; } }
Example 12
Source File: FSDataset.java From RDFS with Apache License 2.0 | 4 votes |
/** {@inheritDoc} */ public void validateBlockMetadata(int namespaceId, Block b) throws IOException { DatanodeBlockInfo info; lock.readLock().lock(); try { info = volumeMap.get(namespaceId, b); } finally { lock.readLock().unlock(); } if (info == null) { throw new IOException("Block " + b + " does not exist in volumeMap."); } FSVolume v = info.getVolume(); File tmp = v.getTmpFile(namespaceId, b); File f = info.getFile(); long fileSize; if (f == null) { f = tmp; if (f == null) { throw new IOException("Block " + b + " does not exist on disk."); } if (!f.exists()) { throw new IOException("Block " + b + " block file " + f + " does not exist on disk."); } fileSize = f.length(); } else { if (info.isFinalized()) { info.verifyFinalizedSize(); fileSize = info.getFinalizedSize(); } else { fileSize = f.length(); } } if (b.getNumBytes() > fileSize) { throw new IOException("Block " + b + " length is " + b.getNumBytes() + " does not match block file length " + f.length()); } File meta = getMetaFile(f, b); if (meta == null) { throw new IOException("Block " + b + " metafile does not exist."); } if (!meta.exists()) { throw new IOException("Block " + b + " metafile " + meta + " does not exist on disk."); } long metaFileSize = meta.length(); if (metaFileSize == 0 && fileSize > 0) { throw new IOException("Block " + b + " metafile " + meta + " is empty."); } long stamp = parseGenerationStamp(f, meta); if (stamp != b.getGenerationStamp()) { throw new IOException("Block " + b + " genstamp is " + b.getGenerationStamp() + " does not match meta file stamp " + stamp); } if (metaFileSize == 0) { // no need to check metadata size for 0 size file return; } // verify that checksum file has an integral number of checkum values. DataChecksum dcs = BlockMetadataHeader.readHeader(meta).getChecksum(); int checksumsize = dcs.getChecksumSize(); long actual = metaFileSize - BlockMetadataHeader.getHeaderSize(); long numChunksInMeta = actual/checksumsize; if (actual % checksumsize != 0) { throw new IOException("Block " + b + " has a checksum file of size " + metaFileSize + " but it does not align with checksum size of " + checksumsize); } int bpc = dcs.getBytesPerChecksum(); long minDataSize = (numChunksInMeta - 1) * bpc; long maxDataSize = numChunksInMeta * bpc; if (fileSize > maxDataSize || fileSize <= minDataSize) { throw new IOException("Block " + b + " is of size " + f.length() + " but has " + (numChunksInMeta + 1) + " checksums and each checksum size is " + checksumsize + " bytes."); } // We could crc-check the entire block here, but it will be a costly // operation. Instead we rely on the above check (file length mismatch) // to detect corrupt blocks. }
Example 13
Source File: FSDataset.java From hadoop-gpu with Apache License 2.0 | 4 votes |
static private void truncateBlock(File blockFile, File metaFile, long oldlen, long newlen) throws IOException { if (newlen == oldlen) { return; } if (newlen > oldlen) { throw new IOException("Cannout truncate block to from oldlen (=" + oldlen + ") to newlen (=" + newlen + ")"); } DataChecksum dcs = BlockMetadataHeader.readHeader(metaFile).getChecksum(); int checksumsize = dcs.getChecksumSize(); int bpc = dcs.getBytesPerChecksum(); long n = (newlen - 1)/bpc + 1; long newmetalen = BlockMetadataHeader.getHeaderSize() + n*checksumsize; long lastchunkoffset = (n - 1)*bpc; int lastchunksize = (int)(newlen - lastchunkoffset); byte[] b = new byte[Math.max(lastchunksize, checksumsize)]; RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw"); try { //truncate blockFile blockRAF.setLength(newlen); //read last chunk blockRAF.seek(lastchunkoffset); blockRAF.readFully(b, 0, lastchunksize); } finally { blockRAF.close(); } //compute checksum dcs.update(b, 0, lastchunksize); dcs.writeValue(b, 0, false); //update metaFile RandomAccessFile metaRAF = new RandomAccessFile(metaFile, "rw"); try { metaRAF.setLength(newmetalen); metaRAF.seek(newmetalen - checksumsize); metaRAF.write(b, 0, checksumsize); } finally { metaRAF.close(); } }