Java Code Examples for org.apache.hadoop.hbase.KeyValue#getKeyValueDataStructureSize()

The following examples show how to use org.apache.hadoop.hbase.KeyValue#getKeyValueDataStructureSize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: SchemaUtil.java From phoenix with Apache License 2.0

6 votes

/**
 * Imperfect estimate of row size given a PTable
 * TODO: keep row count in stats table and use total size / row count instead
 * @param table
 * @return estimate of size in bytes of a row
 */
public static long estimateRowSize(PTable table) {
	int keyLength = estimateKeyLength(table);
	long rowSize = 0;
	for (PColumn column : table.getColumns()) {
		if (!SchemaUtil.isPKColumn(column)) {
            PDataType type = column.getDataType();
            Integer maxLength = column.getMaxLength();
            int valueLength = !type.isFixedWidth() ? VAR_KV_LENGTH_ESTIMATE : maxLength == null ? type.getByteSize() : maxLength;
			rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, column.getFamilyName().getBytes().length, column.getName().getBytes().length, valueLength);
		}
	}
	// Empty key value
	rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, getEmptyColumnFamily(table).length, QueryConstants.EMPTY_COLUMN_BYTES.length, 0);
	return rowSize;
}

Example 2

Source File: SchemaUtil.java From phoenix with Apache License 2.0

6 votes

/**
 * Imperfect estimate of row size given a PTable
 * TODO: keep row count in stats table and use total size / row count instead
 * @param table
 * @return estimate of size in bytes of a row
 */
public static long estimateRowSize(PTable table) {
	int keyLength = estimateKeyLength(table);
	long rowSize = 0;
	for (PColumn column : table.getColumns()) {
		if (!SchemaUtil.isPKColumn(column)) {
            PDataType type = column.getDataType();
            Integer maxLength = column.getMaxLength();
            int valueLength = !type.isFixedWidth() ? VAR_KV_LENGTH_ESTIMATE : maxLength == null ? type.getByteSize() : maxLength;
			rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, column.getFamilyName().getBytes().length, column.getName().getBytes().length, valueLength);
		}
	}
	byte[] emptyKeyValueKV = EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst();
	// Empty key value
	rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, getEmptyColumnFamily(table).length, emptyKeyValueKV.length, 0);
	return rowSize;
}

Example 3

Source File: Result.java From hbase with Apache License 2.0

5 votes

/**
 * Searches for the latest value for the specified column.
 *
 * @param kvs the array to search
 * @param family family name
 * @param foffset family offset
 * @param flength family length
 * @param qualifier column qualifier
 * @param qoffset qualifier offset
 * @param qlength qualifier length
 *
 * @return the index where the value was found, or -1 otherwise
 */
protected int binarySearch(final Cell [] kvs,
    final byte [] family, final int foffset, final int flength,
    final byte [] qualifier, final int qoffset, final int qlength) {

  double keyValueSize = (double)
      KeyValue.getKeyValueDataStructureSize(kvs[0].getRowLength(), flength, qlength, 0);

  byte[] buffer = localBuffer.get();
  if (buffer == null || keyValueSize > buffer.length) {
    // pad to the smallest multiple of the pad width
    buffer = new byte[(int) Math.ceil(keyValueSize / PAD_WIDTH) * PAD_WIDTH];
    localBuffer.set(buffer);
  }

  Cell searchTerm = KeyValueUtil.createFirstOnRow(buffer, 0,
      kvs[0].getRowArray(), kvs[0].getRowOffset(), kvs[0].getRowLength(),
      family, foffset, flength,
      qualifier, qoffset, qlength);

  // pos === ( -(insertion point) - 1)
  int pos = Arrays.binarySearch(kvs, searchTerm, CellComparator.getInstance());
  // never will exact match
  if (pos < 0) {
    pos = (pos+1) * -1;
    // pos is now insertion point
  }
  if (pos == kvs.length) {
    return -1; // doesn't exist
  }
  return pos;
}

Example 4

Source File: TestByteRangeWithKVSerialization.java From hbase with Apache License 2.0

5 votes

static KeyValue readCell(PositionedByteRange pbr) throws Exception {
  int kvStartPos = pbr.getPosition();
  int keyLen = pbr.getInt();
  int valLen = pbr.getInt();
  pbr.setPosition(pbr.getPosition() + keyLen + valLen); // Skip the key and value section
  int tagsLen = ((pbr.get() & 0xff) << 8) ^ (pbr.get() & 0xff);
  pbr.setPosition(pbr.getPosition() + tagsLen); // Skip the tags section
  long mvcc = pbr.getVLong();
  KeyValue kv = new KeyValue(pbr.getBytes(), kvStartPos,
      (int) KeyValue.getKeyValueDataStructureSize(keyLen, valLen, tagsLen));
  kv.setSequenceId(mvcc);
  return kv;
}

Example 5

Source File: EncodedDataBlock.java From hbase with Apache License 2.0

4 votes

/**
 * Provides access to compressed value.
 * @param headerSize header size of the block.
 * @return Forwards sequential iterator.
 */
public Iterator<Cell> getIterator(int headerSize) {
  final int rawSize = rawKVs.length;
  byte[] encodedDataWithHeader = getEncodedData();
  int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT;
  ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
      bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
  final DataInputStream dis = new DataInputStream(bais);

  return new Iterator<Cell>() {
    private ByteBuffer decompressedData = null;
    private Iterator<Boolean> it = isTagsLenZero.iterator();

    @Override
    public boolean hasNext() {
      if (decompressedData == null) {
        return rawSize > 0;
      }
      return decompressedData.hasRemaining();
    }

    @Override
    public Cell next() {
      if (decompressedData == null) {
        try {
          decompressedData = dataBlockEncoder.decodeKeyValues(dis, dataBlockEncoder
              .newDataBlockDecodingContext(meta));
        } catch (IOException e) {
          throw new RuntimeException("Problem with data block encoder, " +
              "most likely it requested more bytes than are available.", e);
        }
        decompressedData.rewind();
      }
      int offset = decompressedData.position();
      int klen = decompressedData.getInt();
      int vlen = decompressedData.getInt();
      int tagsLen = 0;
      ByteBufferUtils.skip(decompressedData, klen + vlen);
      // Read the tag length in case when stream contain tags
      if (meta.isIncludesTags()) {
        boolean noTags = true;
        if (it.hasNext()) {
          noTags = it.next();
        }
        // ROW_INDEX_V1 will not put tagsLen back in cell if it is zero, there is no need
        // to read short here.
        if (!(encoding.equals(DataBlockEncoding.ROW_INDEX_V1) && noTags)) {
          tagsLen = ((decompressedData.get() & 0xff) << 8) ^ (decompressedData.get() & 0xff);
          ByteBufferUtils.skip(decompressedData, tagsLen);
        }
      }
      KeyValue kv = new KeyValue(decompressedData.array(), offset,
          (int) KeyValue.getKeyValueDataStructureSize(klen, vlen, tagsLen));
      if (meta.isIncludesMvcc()) {
        long mvccVersion = ByteBufferUtils.readVLong(decompressedData);
        kv.setSequenceId(mvccVersion);
      }
      return kv;
    }

    @Override
    public void remove() {
      throw new NotImplementedException("remove() is not supported!");
    }

    @Override
    public String toString() {
      return "Iterator of: " + dataBlockEncoder.getClass().getName();
    }

  };
}

Example 6

Source File: PArrayDataTypeEncoder.java From phoenix with Apache License 2.0

4 votes

/**
 * @param colValueMap map from column to value
 * @return estimated encoded size
 */
public static int getEstimatedByteSize(PTable table, int rowLength,
        Map<PColumn, byte[]> colValueMap) {
    // iterate over column familiies
    int rowSize = 0;
    for (PColumnFamily family : table.getColumnFamilies()) {
        Collection<PColumn> columns = family.getColumns();
        // we add a non null value to the start so that we can represent absent values in the array with negative offsets
        int numColumns = columns.size() + 1;
        int cellSize = 1;
        int nulls = 0;
        int maxOffset = 0;
        // iterate over columns
        for (PColumn column : columns) {
            if (colValueMap.containsKey(column)) {
                byte[] colValue = colValueMap.get(column);
                // the column value is null
                if (colValue == null || colValue.length == 0) {
                    ++nulls;
                    maxOffset = cellSize;
                } else {
                    // count the bytes written to serialize nulls
                    if (nulls > 0) {
                        cellSize += (1 + Math.ceil(nulls / 255));
                        nulls = 0;
                    }
                    maxOffset = cellSize;
                    cellSize += colValue.length;
                }
            }
            // the column value is absent
            else {
                ++nulls;
                maxOffset = cellSize;
            }
        }
        // count the bytes used for the offset array
        cellSize +=
                PArrayDataType.useShortForOffsetArray(maxOffset,
                    PArrayDataType.IMMUTABLE_SERIALIZATION_VERSION)
                            ? numColumns * Bytes.SIZEOF_SHORT
                            : numColumns * Bytes.SIZEOF_INT;
        cellSize += 4;
        // count the bytes used for header information
        cellSize += 5;
        // add the size of the single cell containing all column values
        rowSize +=
                KeyValue.getKeyValueDataStructureSize(rowLength,
                    family.getName().getBytes().length,
                    QueryConstants.SINGLE_KEYVALUE_COLUMN_QUALIFIER_BYTES.length, cellSize);
    }
    return rowSize;
}