Java Code Examples for org.apache.hadoop.util.DataChecksum#newDataChecksum()

The following examples show how to use org.apache.hadoop.util.DataChecksum#newDataChecksum() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IFileInputStream.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Create a checksum input stream that reads
 * @param in The input stream to be verified for checksum.
 * @param len The length of the input stream including checksum bytes.
 */
public IFileInputStream(InputStream in, long len, Configuration conf) {
  this.in = in;
  this.inFd = getFileDescriptorIfAvail(in);
  sum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 
      Integer.MAX_VALUE);
  checksumSize = sum.getChecksumSize();
  length = len;
  dataLength = length - checksumSize;

  conf = (conf != null) ? conf : new Configuration();
  readahead = conf.getBoolean(MRConfig.MAPRED_IFILE_READAHEAD,
      MRConfig.DEFAULT_MAPRED_IFILE_READAHEAD);
  readaheadLength = conf.getInt(MRConfig.MAPRED_IFILE_READAHEAD_BYTES,
      MRConfig.DEFAULT_MAPRED_IFILE_READAHEAD_BYTES);

  doReadahead();
}
 
Example 2
Source File: IFileInputStream.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Create a checksum input stream that reads
 * @param in The input stream to be verified for checksum.
 * @param len The length of the input stream including checksum bytes.
 */
public IFileInputStream(InputStream in, long len, Configuration conf) {
  this.in = in;
  this.inFd = getFileDescriptorIfAvail(in);
  sum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32, 
      Integer.MAX_VALUE);
  checksumSize = sum.getChecksumSize();
  length = len;
  dataLength = length - checksumSize;

  conf = (conf != null) ? conf : new Configuration();
  readahead = conf.getBoolean(MRConfig.MAPRED_IFILE_READAHEAD,
      MRConfig.DEFAULT_MAPRED_IFILE_READAHEAD);
  readaheadLength = conf.getInt(MRConfig.MAPRED_IFILE_READAHEAD_BYTES,
      MRConfig.DEFAULT_MAPRED_IFILE_READAHEAD_BYTES);

  doReadahead();
}
 
Example 3
Source File: ChecksumUtil.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Generates a checksum for all the data in indata. The checksum is
 * written to outdata.
 * @param indata input data stream
 * @param startOffset starting offset in the indata stream from where to
 *                    compute checkums from
 * @param endOffset ending offset in the indata stream upto
 *                   which checksums needs to be computed
 * @param outdata the output buffer where checksum values are written
 * @param outOffset the starting offset in the outdata where the
 *                  checksum values are written
 * @param checksumType type of checksum
 * @param bytesPerChecksum number of bytes per checksum value
 */
static void generateChecksums(byte[] indata, int startOffset, int endOffset,
  byte[] outdata, int outOffset, ChecksumType checksumType,
  int bytesPerChecksum) throws IOException {

  if (checksumType == ChecksumType.NULL) {
    return; // No checksum for this block.
  }

  DataChecksum checksum = DataChecksum.newDataChecksum(
      checksumType.getDataChecksumType(), bytesPerChecksum);

  checksum.calculateChunkedSums(
     ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
     ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
}
 
Example 4
Source File: BlockXCodingMerger.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public BlockXCodingMerger(Block block, int namespaceId,
		DataInputStream[] childInputStreams, long offsetInBlock,
		long length, String[] childAddrs, String myAddr,
		DataTransferThrottler throttler,
		int mergerLevel) throws IOException{
	super();
	this.block = block;
	this.namespaceId = namespaceId;
	this.childInputStreams = childInputStreams;
	this.offsetInBlock = offsetInBlock;
	this.length = length;
	this.childAddrs = childAddrs;
	this.myAddr = myAddr;
	this.throttler = throttler;
	this.mergerLevel = mergerLevel;
	Configuration conf = new Configuration();
	this.packetSize = conf.getInt("raid.blockreconstruct.packetsize", 4096);
	this.bytesPerChecksum = conf.getInt("io.bytes.per.checksum", 512);
	this.checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
			bytesPerChecksum, new PureJavaCrc32());
	this.checksumSize = checksum.getChecksumSize();
}
 
Example 5
Source File: TestShortCircuitCache.java    From big-c with Apache License 2.0 6 votes vote down vote up
public TestFileDescriptorPair() throws IOException {
  fis = new FileInputStream[2];
  for (int i = 0; i < 2; i++) {
    String name = dir.getDir() + "/file" + i;
    FileOutputStream fos = new FileOutputStream(name);
    if (i == 0) {
      // write 'data' file
      fos.write(1);
    } else {
      // write 'metadata' file
      BlockMetadataHeader header =
          new BlockMetadataHeader((short)1,
              DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4));
      DataOutputStream dos = new DataOutputStream(fos);
      BlockMetadataHeader.writeHeader(dos, header);
      dos.close();
    }
    fos.close();
    fis[i] = new FileInputStream(name);
  }
}
 
Example 6
Source File: BlockMetadataHeader.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Read the header without changing the position of the FileChannel.
 *
 * @param fc The FileChannel to read.
 * @return the Metadata Header.
 * @throws IOException on error.
 */
public static BlockMetadataHeader preadHeader(FileChannel fc)
    throws IOException {
  final byte arr[] = new byte[getHeaderSize()];
  ByteBuffer buf = ByteBuffer.wrap(arr);

  while (buf.hasRemaining()) {
    if (fc.read(buf, 0) <= 0) {
      throw new EOFException("unexpected EOF while reading " +
          "metadata file header");
    }
  }
  short version = (short)((arr[0] << 8) | (arr[1] & 0xff));
  DataChecksum dataChecksum = DataChecksum.newDataChecksum(arr, 2);
  return new BlockMetadataHeader(version, dataChecksum);
}
 
Example 7
Source File: BlockMetadataHeader.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Read the header without changing the position of the FileChannel.
 *
 * @param fc The FileChannel to read.
 * @return the Metadata Header.
 * @throws IOException on error.
 */
public static BlockMetadataHeader preadHeader(FileChannel fc)
    throws IOException {
  final byte arr[] = new byte[getHeaderSize()];
  ByteBuffer buf = ByteBuffer.wrap(arr);

  while (buf.hasRemaining()) {
    if (fc.read(buf, 0) <= 0) {
      throw new EOFException("unexpected EOF while reading " +
          "metadata file header");
    }
  }
  short version = (short)((arr[0] << 8) | (arr[1] & 0xff));
  DataChecksum dataChecksum = DataChecksum.newDataChecksum(arr, 2);
  return new BlockMetadataHeader(version, dataChecksum);
}
 
Example 8
Source File: DFSClient.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private DataChecksum createChecksum(ChecksumOpt userOpt) {
  // Fill in any missing field with the default.
  ChecksumOpt myOpt = ChecksumOpt.processChecksumOpt(
      defaultChecksumOpt, userOpt);
  DataChecksum dataChecksum = DataChecksum.newDataChecksum(
      myOpt.getChecksumType(),
      myOpt.getBytesPerChecksum());
  if (dataChecksum == null) {
    throw new HadoopIllegalArgumentException("Invalid checksum type: userOpt="
        + userOpt + ", default=" + defaultChecksumOpt
        + ", effective=null");
  }
  return dataChecksum;
}
 
Example 9
Source File: IFileOutputStream.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Create a checksum output stream that writes
 * the bytes to the given stream.
 * @param out
 */
public IFileOutputStream(OutputStream out) {
  super(out);
  sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
      Integer.MAX_VALUE);
  barray = new byte[sum.getChecksumSize()];
}
 
Example 10
Source File: IFileOutputStream.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Create a checksum output stream that writes
 * the bytes to the given stream.
 * @param out
 */
public IFileOutputStream(OutputStream out) {
  super(out);
  sum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32,
      Integer.MAX_VALUE);
  barray = new byte[sum.getChecksumSize()];
  buffer = new byte[4096];
  offset = 0;
}
 
Example 11
Source File: IFileInputStream.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Create a checksum input stream that reads
 * @param in The input stream to be verified for checksum.
 * @param len The length of the input stream including checksum bytes.
 */
public IFileInputStream(InputStream in, long len) {
  this.in = in;
  sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, 
      Integer.MAX_VALUE);
  checksumSize = sum.getChecksumSize();
  length = len;
  dataLength = length - checksumSize;
}
 
Example 12
Source File: DFSOutputStream.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** 
 * @return the object for computing checksum.
 *         The type is NULL if checksum is not computed.
 */
private static DataChecksum getChecksum4Compute(DataChecksum checksum,
    HdfsFileStatus stat) {
  if (isLazyPersist(stat) && stat.getReplication() == 1) {
    // do not compute checksum for writing to single replica to memory
    return DataChecksum.newDataChecksum(Type.NULL,
        checksum.getBytesPerChecksum());
  }
  return checksum;
}
 
Example 13
Source File: IFileOutputStream.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Create a checksum output stream that writes
 * the bytes to the given stream.
 * @param out
 */
public IFileOutputStream(OutputStream out) {
  super(out);
  sum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32,
      Integer.MAX_VALUE);
  barray = new byte[sum.getChecksumSize()];
}
 
Example 14
Source File: BlockReaderLocalLegacy.java    From big-c with Apache License 2.0 5 votes vote down vote up
private BlockReaderLocalLegacy(DFSClient.Conf conf, String hdfsfile,
    ExtendedBlock block, Token<BlockTokenIdentifier> token, long startOffset,
    long length, BlockLocalPathInfo pathinfo, FileInputStream dataIn)
    throws IOException {
  this(conf, hdfsfile, block, token, startOffset, length, pathinfo,
      DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4), false,
      dataIn, startOffset, null);
}
 
Example 15
Source File: DataTransferProtoUtil.java    From big-c with Apache License 2.0 5 votes vote down vote up
public static DataChecksum fromProto(ChecksumProto proto) {
  if (proto == null) return null;

  int bytesPerChecksum = proto.getBytesPerChecksum();
  DataChecksum.Type type = PBHelper.convert(proto.getType());
  return DataChecksum.newDataChecksum(type, bytesPerChecksum);
}
 
Example 16
Source File: IFileOutputStream.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Create a checksum output stream that writes
 * the bytes to the given stream.
 * @param out
 */
public IFileOutputStream(OutputStream out) {
  super(out);
  sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
      Integer.MAX_VALUE, new PureJavaCrc32());
  barray = new byte[sum.getChecksumSize()];
}
 
Example 17
Source File: BlockReceiver.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * reads in the partial crc chunk and computes checksum
 * of pre-existing data in partial chunk.
 */
private Checksum computePartialChunkCrc(long blkoff, long ckoff)
    throws IOException {

  // find offset of the beginning of partial chunk.
  //
  int sizePartialChunk = (int) (blkoff % bytesPerChecksum);
  blkoff = blkoff - sizePartialChunk;
  if (LOG.isDebugEnabled()) {
    LOG.debug("computePartialChunkCrc for " + block
        + ": sizePartialChunk=" + sizePartialChunk
        + ", block offset=" + blkoff
        + ", metafile offset=" + ckoff);
  }

  // create an input stream from the block file
  // and read in partial crc chunk into temporary buffer
  //
  byte[] buf = new byte[sizePartialChunk];
  byte[] crcbuf = new byte[checksumSize];
  try (ReplicaInputStreams instr =
      datanode.data.getTmpInputStreams(block, blkoff, ckoff)) {
    IOUtils.readFully(instr.getDataIn(), buf, 0, sizePartialChunk);

    // open meta file and read in crc value computer earlier
    IOUtils.readFully(instr.getChecksumIn(), crcbuf, 0, crcbuf.length);
  }

  // compute crc of partial chunk from data read in the block file.
  final Checksum partialCrc = DataChecksum.newDataChecksum(
      diskChecksum.getChecksumType(), diskChecksum.getBytesPerChecksum());
  partialCrc.update(buf, 0, sizePartialChunk);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Read in partial CRC chunk from disk for " + block);
  }

  // paranoia! verify that the pre-computed crc matches what we
  // recalculated just now
  if (partialCrc.getValue() != checksum2long(crcbuf)) {
    String msg = "Partial CRC " + partialCrc.getValue() +
                 " does not match value computed the " +
                 " last time file was closed " +
                 checksum2long(crcbuf);
    throw new IOException(msg);
  }
  return partialCrc;
}
 
Example 18
Source File: BlockXCodingSender.java    From RDFS with Apache License 2.0 4 votes vote down vote up
private void initialize(int namespaceId, Block block, long blockLength,
		long startOffset, long length, boolean corruptChecksumOk, 
		boolean verifyChecksum, DataInputStream metadataIn,
		InputStreamFactory streamFactory)
		throws IOException {
	try {
		this.block = block;
		this.corruptChecksumOk = corruptChecksumOk;
		this.verifyChecksum = verifyChecksum;
		this.blockLength = blockLength;
		
		this.conf = new Configuration();
		this.packetSize = conf.getInt("raid.blockreconstruct.packetsize", 4096);
		if (!corruptChecksumOk || metadataIn != null) {
			this.checksumIn = metadataIn;

			// read and handle the common header here. For now just a
			// version
			BlockMetadataHeader header = BlockMetadataHeader
					.readHeader(checksumIn);
			short version = header.getVersion();

			if (version != FSDataset.METADATA_VERSION) {
				LOG.warn("NTar:Wrong version (" + version
						+ ") for metadata file for " + block
						+ " ignoring ...");
			}
			checksum = header.getChecksum();
		} else {
			if (!ignoreChecksum) {
				LOG.warn("NTar:Could not find metadata file for " + block);
			}
			// This only decides the buffer size. Use BUFFER_SIZE?
			checksum = DataChecksum.newDataChecksum(
					DataChecksum.CHECKSUM_CRC32, 512);
		}

		/*
		 * If bytesPerChecksum is very large, then the metadata file is
		 * mostly corrupted. For now just truncate bytesPerchecksum to
		 * blockLength.
		 */
		bytesPerChecksum = checksum.getBytesPerChecksum();
		if (bytesPerChecksum > 10 * 1024 * 1024
				&& bytesPerChecksum > blockLength) {
			checksum = DataChecksum.newDataChecksum(
					checksum.getChecksumType(),
					Math.max((int) blockLength, 10 * 1024 * 1024));
			bytesPerChecksum = checksum.getBytesPerChecksum();
		}
		checksumSize = checksum.getChecksumSize();

		if (length < 0  || length > blockLength) {
			length = blockLength;
		}

		endOffset = blockLength;
		if (startOffset < 0 || startOffset >= endOffset) {
			//String msg = " Offset " + startOffset + " and length " + length
			//		+ " don't match block " + block + " ( blockLen "
			//		+ endOffset + " )";
			//LOG.error("NTar : BlockXCodingSender: " + msg);
			noData = true;
			return;
		}

		offset = (startOffset - (startOffset % bytesPerChecksum));
		if (length >= 0) {
			// Make sure endOffset points to end of a checksumed chunk.
			long tmpLen = startOffset + length;
			if (tmpLen % bytesPerChecksum != 0) {
				tmpLen += (bytesPerChecksum - tmpLen % bytesPerChecksum);
			}
			if (tmpLen < endOffset) {
				endOffset = tmpLen;
			}
		}

		// seek to the right offsets
		if (offset > 0) {
			long checksumSkip = (offset / bytesPerChecksum) * checksumSize;
			// note blockInStream is seeked when created below
			if (checksumSkip > 0 && checksumIn != null) {
				// Should we use seek() for checksum file as well?
				IOUtils.skipFully(checksumIn, checksumSkip);
			}
		}
		seqno = 0;

		blockIn = streamFactory.createStream(offset);
	} catch (IOException ioe) {
		IOUtils.closeStream(this);
		IOUtils.closeStream(blockIn);
		throw ioe;
	}
}
 
Example 19
Source File: BlockReceiver.java    From RDFS with Apache License 2.0 4 votes vote down vote up
BlockReceiver(int namespaceId, Block block, DataInputStream in, String inAddr,
              String myAddr, boolean isRecovery, String clientName, 
              DatanodeInfo srcDataNode, DataNode datanode) throws IOException {
  try{
    this.namespaceId = namespaceId;
    this.block = block;
    this.in = in;
    this.inAddr = inAddr;
    this.myAddr = myAddr;
    this.isRecovery = isRecovery;
    this.clientName = clientName;
    this.offsetInBlock = 0;
    this.srcDataNode = srcDataNode;
    this.datanode = datanode;
    this.checksum = DataChecksum.newDataChecksum(in);
    this.bytesPerChecksum = checksum.getBytesPerChecksum();
    this.checksumSize = checksum.getChecksumSize();
    //
    // Open local disk out
    //
    streams = datanode.data.writeToBlock(namespaceId, this.block, isRecovery,
                            clientName == null || clientName.length() == 0);
    replicaBeingWritten = datanode.data.getReplicaBeingWritten(namespaceId, this.block);
    this.finalized = false;
    if (streams != null) {
      this.out = streams.dataOut;
      this.cout = streams.checksumOut;
      this.checksumOut = new DataOutputStream(new BufferedOutputStream(
                                                streams.checksumOut, 
                                                SMALL_BUFFER_SIZE));
      // If this block is for appends, then remove it from periodic
      // validation.
      if (datanode.blockScanner != null && isRecovery) {
        datanode.blockScanner.deleteBlock(namespaceId, block);
      }
    }
  } catch (BlockAlreadyExistsException bae) {
    throw bae;
  } catch(IOException ioe) {
    IOUtils.closeStream(this);
    cleanupBlock();
    
    // check if there is a disk error
    IOException cause = FSDataset.getCauseIfDiskError(ioe);
    DataNode.LOG.warn("IOException in BlockReceiver constructor. Cause is ",
        cause);
    
    if (cause != null) { // possible disk error
      ioe = cause;
      datanode.checkDiskError(ioe); // may throw an exception here
    }
    
    throw ioe;
  }
}
 
Example 20
Source File: BlockReader.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public static BlockReader newBlockReader( int dataTransferVersion,
                                   int namespaceId,
                                   Socket sock, String file,
                                   long blockId,
                                   long genStamp,
                                   long startOffset, long len,
                                   int bufferSize, boolean verifyChecksum,
                                   String clientName, long minSpeedBps)
                                   throws IOException {
  // in and out will be closed when sock is closed (by the caller)
  DataOutputStream out = new DataOutputStream(
    new BufferedOutputStream(NetUtils.getOutputStream(sock,HdfsConstants.WRITE_TIMEOUT)));

  //write the header.
  ReadBlockHeader readBlockHeader = new ReadBlockHeader(
      dataTransferVersion, namespaceId, blockId, genStamp, startOffset, len,
      clientName);
  readBlockHeader.writeVersionAndOpCode(out);
  readBlockHeader.write(out);
  out.flush();

  //
  // Get bytes in block, set streams
  //

  DataInputStream in = new DataInputStream(
      new BufferedInputStream(NetUtils.getInputStream(sock),
                              bufferSize));

  if ( in.readShort() != DataTransferProtocol.OP_STATUS_SUCCESS ) {
    throw new IOException("Got error in response to OP_READ_BLOCK " +
                          "self=" + sock.getLocalSocketAddress() +
                          ", remote=" + sock.getRemoteSocketAddress() +
                          " for file " + file +
                          " for block " + blockId);
  }
  DataChecksum checksum = DataChecksum.newDataChecksum( in , new PureJavaCrc32());
  //Warning when we get CHECKSUM_NULL?

  // Read the first chunk offset.
  long firstChunkOffset = in.readLong();

  if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
      firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) {
    throw new IOException("BlockReader: error in first chunk offset (" +
                          firstChunkOffset + ") startOffset is " +
                          startOffset + " for file " + file);
  }

  return new BlockReader(file, blockId, in, checksum, verifyChecksum,
      startOffset, firstChunkOffset, sock, minSpeedBps, dataTransferVersion);
}