Java Code Examples for org.apache.hadoop.io.WritableUtils#getVIntSize()

The following examples show how to use org.apache.hadoop.io.WritableUtils#getVIntSize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTFileStreams.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private long writeRecords(int count, boolean knownKeyLength,
    boolean knownValueLength, boolean close) throws IOException {
  long rawDataSize = 0;
  for (int nx = 0; nx < count; nx++) {
    String key = TestTFileByteArrays.composeSortedKey("key", nx);
    DataOutputStream outKey =
        writer.prepareAppendKey(knownKeyLength ? key.length() : -1);
    outKey.write(key.getBytes());
    outKey.close();
    String value = "value" + nx;
    DataOutputStream outValue =
        writer.prepareAppendValue(knownValueLength ? value.length() : -1);
    outValue.write(value.getBytes());
    outValue.close();
    rawDataSize +=
        WritableUtils.getVIntSize(key.getBytes().length)
            + key.getBytes().length
            + WritableUtils.getVIntSize(value.getBytes().length)
            + value.getBytes().length;
  }
  if (close) {
    closeOutput();
  }
  return rawDataSize;
}
 
Example 2
Source File: NoneEncoder.java    From hbase with Apache License 2.0 6 votes vote down vote up
public int write(Cell cell) throws IOException {
  // We write tags seperately because though there is no tag in KV
  // if the hfilecontext says include tags we need the tags length to be
  // written
  int size = KeyValueUtil.oswrite(cell, out, false);
  // Write the additional tag into the stream
  if (encodingCtx.getHFileContext().isIncludesTags()) {
    int tagsLength = cell.getTagsLength();
    out.writeShort(tagsLength);
    if (tagsLength > 0) {
      PrivateCellUtil.writeTags(out, cell, tagsLength);
    }
    size += tagsLength + KeyValue.TAGS_LENGTH_SIZE;
  }
  if (encodingCtx.getHFileContext().isIncludesMvcc()) {
    WritableUtils.writeVLong(out, cell.getSequenceId());
    size += WritableUtils.getVIntSize(cell.getSequenceId());
  }
  return size;
}
 
Example 3
Source File: IFile.java    From tez with Apache License 2.0 6 votes vote down vote up
protected void writeKVPair(byte[] keyData, int keyPos, int keyLength,
    byte[] valueData, int valPos, int valueLength) throws IOException {
  writeValueMarker(out);
  WritableUtils.writeVInt(out, keyLength);
  WritableUtils.writeVInt(out, valueLength);
  out.write(keyData, keyPos, keyLength);
  out.write(valueData, valPos, valueLength);

  // Update bytes written
  decompressedBytesWritten +=
      keyLength + valueLength + WritableUtils.getVIntSize(keyLength)
          + WritableUtils.getVIntSize(valueLength);
  if (serializedUncompressedBytes != null) {
    serializedUncompressedBytes.increment(keyLength + valueLength);
  }
}
 
Example 4
Source File: ValueSchema.java    From phoenix with Apache License 2.0 5 votes vote down vote up
public int getEstimatedByteSize() {
    int size = 0;
    size += WritableUtils.getVIntSize(minNullable);
    size += WritableUtils.getVIntSize(fields.size());
    size += fields.size() * 3;
    return size;
}
 
Example 5
Source File: BufferedDataBlockEncoder.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * @return unencoded size added
 */
protected final int afterEncodingKeyValue(Cell cell, DataOutputStream out,
    HFileBlockDefaultEncodingContext encodingCtx) throws IOException {
  int size = 0;
  if (encodingCtx.getHFileContext().isIncludesTags()) {
    int tagsLength = cell.getTagsLength();
    ByteBufferUtils.putCompressedInt(out, tagsLength);
    // There are some tags to be written
    if (tagsLength > 0) {
      TagCompressionContext tagCompressionContext = encodingCtx.getTagCompressionContext();
      // When tag compression is enabled, tagCompressionContext will have a not null value. Write
      // the tags using Dictionary compression in such a case
      if (tagCompressionContext != null) {
        // Not passing tagsLength considering that parsing of the tagsLength is not costly
        PrivateCellUtil.compressTags(out, cell, tagCompressionContext);
      } else {
        PrivateCellUtil.writeTags(out, cell, tagsLength);
      }
    }
    size += tagsLength + KeyValue.TAGS_LENGTH_SIZE;
  }
  if (encodingCtx.getHFileContext().isIncludesMvcc()) {
    // Copy memstore timestamp from the byte buffer to the output stream.
    long memstoreTS = cell.getSequenceId();
    WritableUtils.writeVLong(out, memstoreTS);
    // TODO use a writeVLong which returns the #bytes written so that 2 time parsing can be
    // avoided.
    size += WritableUtils.getVIntSize(memstoreTS);
  }
  return size;
}
 
Example 6
Source File: AbstractTransactionAwareTable.java    From phoenix-tephra with Apache License 2.0 5 votes vote down vote up
/**
 * @param vint long to make a vint of.
 * @return long in vint byte array representation
 * We could alternatively make this abstract and
 * implement this method as Bytes.vintToBytes(long) in
 * every compat module. 
 */
protected byte [] getVIntBytes(final long vint) {
  long i = vint;
  int size = WritableUtils.getVIntSize(i);
  byte [] result = new byte[size];
  int offset = 0;
  if (i >= -112 && i <= 127) {
    result[offset] = (byte) i;
    return result;
  }

  int len = -112;
  if (i < 0) {
    i ^= -1L; // take one's complement'
    len = -120;
  }

  long tmp = i;
  while (tmp != 0) {
    tmp = tmp >> 8;
  len--;
  }

  result[offset++] = (byte) len;

  len = (len < -120) ? -(len + 120) : -(len + 112);

  for (int idx = len; idx != 0; idx--) {
    int shiftbits = (idx - 1) * 8;
    long mask = 0xFFL << shiftbits;
    result[offset++] = (byte) ((i & mask) >> shiftbits);
  }
  return result;
}
 
Example 7
Source File: KeyValueUtil.java    From hbase with Apache License 2.0 5 votes vote down vote up
public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
    final boolean includeMvccVersion) {
  // keep pushing the limit out. assume enough capacity
  bb.limit(bb.position() + kv.getLength());
  bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
  if (includeMvccVersion) {
    int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getSequenceId());
    ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
    ByteBufferUtils.writeVLong(bb, kv.getSequenceId());
  }
}
 
Example 8
Source File: TestGridmixRecord.java    From RDFS with Apache License 2.0 5 votes vote down vote up
static void checkSpec(GridmixKey a, GridmixKey b) throws Exception {
  final Random r = new Random();
  final long s = r.nextLong();
  r.setSeed(s);
  LOG.info("spec: " + s);
  final DataInputBuffer in = new DataInputBuffer();
  final DataOutputBuffer out = new DataOutputBuffer();
  a.setType(GridmixKey.REDUCE_SPEC);
  b.setType(GridmixKey.REDUCE_SPEC);
  for (int i = 0; i < 100; ++i) {
    final int in_rec = r.nextInt(Integer.MAX_VALUE);
    a.setReduceInputRecords(in_rec);
    final int out_rec = r.nextInt(Integer.MAX_VALUE);
    a.setReduceOutputRecords(out_rec);
    final int out_bytes = r.nextInt(Integer.MAX_VALUE);
    a.setReduceOutputBytes(out_bytes);
    final int min = WritableUtils.getVIntSize(in_rec)
                  + WritableUtils.getVIntSize(out_rec)
                  + WritableUtils.getVIntSize(out_bytes);
    assertEquals(min + 2, a.fixedBytes()); // meta + vint min
    final int size = r.nextInt(1024) + a.fixedBytes() + 1;
    setSerialize(a, r.nextLong(), size, out);
    assertEquals(size, out.getLength());
    assertTrue(a.equals(a));
    assertEquals(0, a.compareTo(a));

    in.reset(out.getData(), 0, out.getLength());

    b.readFields(in);
    assertEquals(size, b.getSize());
    assertEquals(in_rec, b.getReduceInputRecords());
    assertEquals(out_rec, b.getReduceOutputRecords());
    assertEquals(out_bytes, b.getReduceOutputBytes());
    assertTrue(a.equals(b));
    assertEquals(0, a.compareTo(b));
    assertEquals(a.hashCode(), b.hashCode());
  }
}
 
Example 9
Source File: IFile.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
protected void readKeyValueLength(DataInput dIn) throws IOException {
  currentKeyLength = WritableUtils.readVInt(dIn);
  currentValueLength = WritableUtils.readVInt(dIn);
  if (currentKeyLength != RLE_MARKER) {
    // original key length
    originalKeyLength = currentKeyLength;
  }
  bytesRead +=
      WritableUtils.getVIntSize(currentKeyLength)
          + WritableUtils.getVIntSize(currentValueLength);
}
 
Example 10
Source File: TestGridmixRecord.java    From big-c with Apache License 2.0 5 votes vote down vote up
static void checkSpec(GridmixKey a, GridmixKey b) throws Exception {
  final Random r = new Random();
  final long s = r.nextLong();
  r.setSeed(s);
  LOG.info("spec: " + s);
  final DataInputBuffer in = new DataInputBuffer();
  final DataOutputBuffer out = new DataOutputBuffer();
  a.setType(GridmixKey.REDUCE_SPEC);
  b.setType(GridmixKey.REDUCE_SPEC);
  for (int i = 0; i < 100; ++i) {
    final int in_rec = r.nextInt(Integer.MAX_VALUE);
    a.setReduceInputRecords(in_rec);
    final int out_rec = r.nextInt(Integer.MAX_VALUE);
    a.setReduceOutputRecords(out_rec);
    final int out_bytes = r.nextInt(Integer.MAX_VALUE);
    a.setReduceOutputBytes(out_bytes);
    final int min = WritableUtils.getVIntSize(in_rec)
                  + WritableUtils.getVIntSize(out_rec)
                  + WritableUtils.getVIntSize(out_bytes)
                  + WritableUtils.getVIntSize(0);
    assertEquals(min + 2, a.fixedBytes()); // meta + vint min
    final int size = r.nextInt(1024) + a.fixedBytes() + 1;
    setSerialize(a, r.nextLong(), size, out);
    assertEquals(size, out.getLength());
    assertTrue(a.equals(a));
    assertEquals(0, a.compareTo(a));

    in.reset(out.getData(), 0, out.getLength());

    b.readFields(in);
    assertEquals(size, b.getSize());
    assertEquals(in_rec, b.getReduceInputRecords());
    assertEquals(out_rec, b.getReduceOutputRecords());
    assertEquals(out_bytes, b.getReduceOutputBytes());
    assertTrue(a.equals(b));
    assertEquals(0, a.compareTo(b));
    assertEquals(a.hashCode(), b.hashCode());
  }
}
 
Example 11
Source File: IFile.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
protected void readValueLength(DataInput dIn) throws IOException {
  currentValueLength = WritableUtils.readVInt(dIn);
  bytesRead += WritableUtils.getVIntSize(currentValueLength);
  if (currentValueLength == V_END_MARKER) {
    readKeyValueLength(dIn);
  }
}
 
Example 12
Source File: ByteUtil.java    From phoenix with Apache License 2.0 5 votes vote down vote up
public static byte[] serializeVIntArray(int[] intArray, int encodedLength) {
    int size = WritableUtils.getVIntSize(encodedLength);
    for (int i = 0; i < intArray.length; i++) {
        size += WritableUtils.getVIntSize(intArray[i]);
    }
    int offset = 0;
    byte[] out = new byte[size];
    offset += ByteUtil.vintToBytes(out, offset, size);
    for (int i = 0; i < intArray.length; i++) {
        offset += ByteUtil.vintToBytes(out, offset, intArray[i]);
    }
    return out;
}
 
Example 13
Source File: SpoolingResultIterator.java    From phoenix with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Tuple advance() throws SQLException {
    if (offset >= bytes.length) {
        return next = null;
    }
    int resultSize = ByteUtil.vintFromBytes(bytes, offset);
    offset += WritableUtils.getVIntSize(resultSize);
    ImmutableBytesWritable value = new ImmutableBytesWritable(bytes,offset,resultSize);
    offset += resultSize;
    Tuple result = new ResultTuple(new Result(value));
    return next = result;
}
 
Example 14
Source File: IFile.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
protected void writeValue(byte[] data, int offset, int length) throws IOException {
  if (!bufferFull) {
    totalSize += ((prevKey != REPEAT_KEY) ? RLE_MARKER_SIZE : 0)
        + WritableUtils.getVIntSize(length) + length;

    if (shouldWriteToDisk()) {
      resetToFileBasedWriter();
    }
  }
  super.writeValue(data, offset, length);
}
 
Example 15
Source File: KeyValueSchema.java    From phoenix with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private int getVarLengthBytes(int length) {
    return length + WritableUtils.getVIntSize(length);
}
 
Example 16
Source File: IFile.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public void close() throws IOException {
  
  // Close the serializers
  keySerializer.close();
  valueSerializer.close();

  // Write EOF_MARKER for key/value length
  WritableUtils.writeVInt(out, EOF_MARKER);
  WritableUtils.writeVInt(out, EOF_MARKER);
  decompressedBytesWritten += 2 * WritableUtils.getVIntSize(EOF_MARKER);
  
  //Flush the stream
  out.flush();
  
  if (compressOutput) {
    // Flush
    compressedOut.finish();
    compressedOut.resetState();
  }
  
  // Close the underlying stream iff we own it...
  if (ownOutputStream) {
    out.close();
  }
  else {
    // Write the checksum
    checksumOut.finish();
  }

  compressedBytesWritten = rawOut.getPos() - start;

  if (compressOutput) {
    // Return back the compressor
    CodecPool.returnCompressor(compressor);
    compressor = null;
  }

  out = null;
  if(writtenRecordsCounter != null) {
    writtenRecordsCounter.increment(numRecordsWritten);
  }
}
 
Example 17
Source File: IFile.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
public void close() throws IOException {
  checkState(!closed.getAndSet(true), "Writer was already closed earlier");

  // When IFile writer is created by BackupStore, we do not have
  // Key and Value classes set. So, check before closing the
  // serializers
  if (keyClass != null) {
    keySerializer.close();
    valueSerializer.close();
  }

  // write V_END_MARKER as needed
  writeValueMarker(out);

  // Write EOF_MARKER for key/value length
  WritableUtils.writeVInt(out, EOF_MARKER);
  WritableUtils.writeVInt(out, EOF_MARKER);
  decompressedBytesWritten += 2 * WritableUtils.getVIntSize(EOF_MARKER);
  //account for header bytes
  decompressedBytesWritten += HEADER.length;

  //Flush the stream
  out.flush();

  if (compressOutput) {
    // Flush
    compressedOut.finish();
    compressedOut.resetState();
  }

  // Close the underlying stream iff we own it...
  if (ownOutputStream) {
    out.close();
  }
  else {
    // Write the checksum
    checksumOut.finish();
  }
  //header bytes are already included in rawOut
  compressedBytesWritten = rawOut.getPos() - start;

  if (compressOutput) {
    // Return back the compressor
    CodecPool.returnCompressor(compressor);
    compressor = null;
  }

  out = null;
  if (writtenRecordsCounter != null) {
    writtenRecordsCounter.increment(numRecordsWritten);
  }
  LOG.info("Total keys written=" + numRecordsWritten + "; Savings(optimized due to " +
      "multi-kv/rle)=" + totalKeySaving + "; number of RLEs written=" + rleWritten);
}
 
Example 18
Source File: TestHFileBlock.java    From hbase with Apache License 2.0 4 votes vote down vote up
static int writeTestKeyValues(HFileBlock.Writer hbw, int seed, boolean includesMemstoreTS,
    boolean useTag) throws IOException {
  List<KeyValue> keyValues = new ArrayList<>();
  Random randomizer = new Random(42L + seed); // just any fixed number

  // generate keyValues
  for (int i = 0; i < NUM_KEYVALUES; ++i) {
    byte[] row;
    long timestamp;
    byte[] family;
    byte[] qualifier;
    byte[] value;

    // generate it or repeat, it should compress well
    if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) {
      row = CellUtil.cloneRow(keyValues.get(randomizer.nextInt(keyValues.size())));
    } else {
      row = new byte[FIELD_LENGTH];
      randomizer.nextBytes(row);
    }
    if (0 == i) {
      family = new byte[FIELD_LENGTH];
      randomizer.nextBytes(family);
    } else {
      family = CellUtil.cloneFamily(keyValues.get(0));
    }
    if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) {
      qualifier = CellUtil.cloneQualifier(keyValues.get(randomizer.nextInt(keyValues.size())));
    } else {
      qualifier = new byte[FIELD_LENGTH];
      randomizer.nextBytes(qualifier);
    }
    if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) {
      value = CellUtil.cloneValue(keyValues.get(randomizer.nextInt(keyValues.size())));
    } else {
      value = new byte[FIELD_LENGTH];
      randomizer.nextBytes(value);
    }
    if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) {
      timestamp = keyValues.get(
          randomizer.nextInt(keyValues.size())).getTimestamp();
    } else {
      timestamp = randomizer.nextLong();
    }
    if (!useTag) {
      keyValues.add(new KeyValue(row, family, qualifier, timestamp, value));
    } else {
      keyValues.add(new KeyValue(row, family, qualifier, timestamp, value,
          new Tag[] { new ArrayBackedTag((byte) 1, Bytes.toBytes("myTagVal")) }));
    }
  }

  // sort it and write to stream
  int totalSize = 0;
  Collections.sort(keyValues, CellComparatorImpl.COMPARATOR);

  for (KeyValue kv : keyValues) {
    totalSize += kv.getLength();
    if (includesMemstoreTS) {
      long memstoreTS = randomizer.nextLong();
      kv.setSequenceId(memstoreTS);
      totalSize += WritableUtils.getVIntSize(memstoreTS);
    }
    hbw.write(kv);
  }
  return totalSize;
}
 
Example 19
Source File: GridmixKey.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public int getSize() {
  return WritableUtils.getVIntSize(rec_in) +
         WritableUtils.getVIntSize(rec_out) +
         WritableUtils.getVIntSize(bytes_out);
}
 
Example 20
Source File: Utils.java    From hadoop-gpu with Apache License 2.0 2 votes vote down vote up
/**
 * Get the encoded length if an integer is stored in a variable-length format
 * @return the encoded length
 */
public static int getVIntSize(long i) {
  return WritableUtils.getVIntSize(i);
}