Java Code Examples for org.apache.hadoop.hbase.util.Bytes#split()

The following examples show how to use org.apache.hadoop.hbase.util.Bytes#split() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may want to check out the right sidebar which shows the related API usage.
Example 1
Source Project: phoenix-omid   File: RegionSplitter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public byte[][] split(int numRegions) {
    Preconditions.checkArgument(
        Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
        "last row (%s) is configured less than first row (%s)",
        Bytes.toStringBinary(lastRowBytes),
        Bytes.toStringBinary(firstRowBytes));

    byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
                                  numRegions - 1);
    Preconditions.checkState(splits != null,
                             "Could not split region with given user input: " + this);

    // remove endpoints, which are included in the splits list
    return Arrays.copyOfRange(splits, 1, splits.length - 1);
}
 
Example 2
Source Project: hbase   File: ThriftAdmin.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void createTable(TableDescriptor desc, byte[] startKey, byte[] endKey, int numRegions)
    throws IOException {
  if(numRegions < 3) {
    throw new IllegalArgumentException("Must create at least three regions");
  } else if(Bytes.compareTo(startKey, endKey) >= 0) {
    throw new IllegalArgumentException("Start key must be smaller than end key");
  }
  if (numRegions == 3) {
    createTable(desc, new byte[][]{startKey, endKey});
    return;
  }
  byte [][] splitKeys = Bytes.split(startKey, endKey, numRegions - 3);
  if(splitKeys == null || splitKeys.length != numRegions - 1) {
    throw new IllegalArgumentException("Unable to split key range into enough regions");
  }
  createTable(desc, splitKeys);
}
 
Example 3
/**
 * Create a list of {@link RegionInfo}s that represent a region chain of the specified length.
 */
private static List<RegionInfo> createRegionInfos(final TableName tableName, final int length) {
  if (length < 1) {
    throw new IllegalStateException("length must be greater than or equal to 1.");
  }

  final byte[] startKey = Bytes.toBytes("aaaaa");
  final byte[] endKey = Bytes.toBytes("zzzzz");
  if (length == 1) {
    return Collections.singletonList(createRegionInfo(tableName, startKey, endKey));
  }

  final byte[][] splitKeys = Bytes.split(startKey, endKey, length - 1);
  final List<RegionInfo> ret = new ArrayList<>(length);
  for (int i = 0; i < splitKeys.length - 1; i++) {
    ret.add(createRegionInfo(tableName, splitKeys[i], splitKeys[i+1]));
  }
  return ret;
}
 
Example 4
@Override
public int getPartition(final ImmutableBytesWritable key, final VALUE value,
    final int reduces) {
  if (reduces == 1) return 0;
  if (this.lastReduces != reduces) {
    this.splits = Bytes.split(this.startkey, this.endkey, reduces - 1);
    for (int i = 0; i < splits.length; i++) {
      LOG.info(Bytes.toStringBinary(splits[i]));
    }
    this.lastReduces = reduces;
  }
  int pos = Bytes.binarySearch(this.splits, key.get(), key.getOffset(),
    key.getLength());
  // Below code is from hfile index search.
  if (pos < 0) {
    pos++;
    pos *= -1;
    if (pos == 0) {
      // falls before the beginning of the file.
      throw new RuntimeException("Key outside start/stop range: " +
        key.toString());
    }
    pos--;
  }
  return pos;
}
 
Example 5
Source Project: phoenix   File: ByteUtilTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitBytes() {
    byte[] startRow = Bytes.toBytes("EA");
    byte[] stopRow = Bytes.toBytes("EZ");
    byte[][] splitPoints = Bytes.split(startRow, stopRow, 10);
    for (byte[] splitPoint : splitPoints) {
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(startRow, splitPoint) <= 0);
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(stopRow, splitPoint) >= 0);
    }
}
 
Example 6
Source Project: hbase   File: HBaseTestingUtility.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a table with multiple regions.
 * @param tableName
 * @param family
 * @param numRegions
 * @return A Table instance for the created table.
 * @throws IOException
 */
public Table createMultiRegionTable(TableName tableName, byte[] family, int numRegions)
    throws IOException {
  if (numRegions < 3) throw new IOException("Must create at least 3 regions");
  byte[] startKey = Bytes.toBytes("aaaaa");
  byte[] endKey = Bytes.toBytes("zzzzz");
  byte[][] splitKeys = Bytes.split(startKey, endKey, numRegions - 3);

  return createTable(tableName, new byte[][] { family }, splitKeys);
}
 
Example 7
Source Project: hbase   File: HBaseTestingUtility.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create region split keys between startkey and endKey
 *
 * @param startKey
 * @param endKey
 * @param numRegions the number of regions to be created. it has to be greater than 3.
 * @return resulting split keys
 */
public byte[][] getRegionSplitStartKeys(byte[] startKey, byte[] endKey, int numRegions){
  assertTrue(numRegions>3);
  byte [][] tmpSplitKeys = Bytes.split(startKey, endKey, numRegions - 3);
  byte [][] result = new byte[tmpSplitKeys.length+1][];
  System.arraycopy(tmpSplitKeys, 0, result, 1, tmpSplitKeys.length);
  result[0] = HConstants.EMPTY_BYTE_ARRAY;
  return result;
}
 
Example 8
Source Project: hbase   File: RawAsyncHBaseAdmin.java    License: Apache License 2.0 5 votes vote down vote up
private byte[][] getSplitKeys(byte[] startKey, byte[] endKey, int numRegions) {
  if (numRegions < 3) {
    throw new IllegalArgumentException("Must create at least three regions");
  } else if (Bytes.compareTo(startKey, endKey) >= 0) {
    throw new IllegalArgumentException("Start key must be smaller than end key");
  }
  if (numRegions == 3) {
    return new byte[][] { startKey, endKey };
  }
  byte[][] splitKeys = Bytes.split(startKey, endKey, numRegions - 3);
  if (splitKeys == null || splitKeys.length != numRegions - 1) {
    throw new IllegalArgumentException("Unable to split key range into enough regions");
  }
  return splitKeys;
}
 
Example 9
Source Project: pinpoint   File: CreateTableChange.java    License: Apache License 2.0 5 votes vote down vote up
private byte[][] generateSplitKeys() {
    byte[][] splits = Bytes.split(FIRST_ROW_BYTES, LAST_ROW_BYTES, true, numRegions - 1);
    // remove endpoints, which are included in the splits list
    if (splits == null) {
        throw new IllegalStateException("Could not generate split keys, numRegions : " + numRegions);
    }
    return Arrays.copyOfRange(splits, 1, splits.length - 1);
}
 
Example 10
/**
 * Compute the buckets, which have the boundaries and estimated counts.
 * Note that the right bound for the very last bucket is inclusive.
 * The left and right bounds can be equivalent, for single value buckets.
 * @return
 */
public List<Bucket> computeBuckets() {
    Preconditions.checkState(bars.size() >= numBuckets, "Not enough data points to compute buckets");
    List<Bucket> buckets = new ArrayList<>();
    long idealBuckSize = (long) Math.ceil(totalCount / (double) numBuckets);
    long currCount = 0;
    int barsIdx = 0;
    byte[] prevBound = bars.get(0).leftBoundInclusive;
    Bar currBar = null;
    for (int i = 0; i < numBuckets; i++) {
        while (currCount <= idealBuckSize && barsIdx < bars.size()) {
            currBar = bars.get(barsIdx++);
            currCount += currBar.getSize();
        }
        long surplus = Math.max(currCount - idealBuckSize, 0);
        // deviate a bit from the paper here
        // to estimate the bound, we split the range into 8 splits for a total of 10 including start/end
        // then we calculate the % of the currBar's count we've used, and round down to the closest split
        int closestSplitIdx = (int) ((1 - ((double) surplus / currBar.getSize())) * 9);
        byte[][] splits = Bytes.split(currBar.leftBoundInclusive, currBar.rightBoundExclusive, 8);
        Bucket bucket = new Bucket(prevBound, splits[closestSplitIdx]);
        bucket.incrementCountEstimate(currCount - surplus);
        prevBound = splits[closestSplitIdx];
        buckets.add(bucket);
        currCount = surplus;
    }
    return buckets;
}
 
Example 11
Source Project: phoenix   File: ByteUtilTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitBytes() {
    byte[] startRow = Bytes.toBytes("EA");
    byte[] stopRow = Bytes.toBytes("EZ");
    byte[][] splitPoints = Bytes.split(startRow, stopRow, 10);
    for (byte[] splitPoint : splitPoints) {
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(startRow, splitPoint) <= 0);
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(stopRow, splitPoint) >= 0);
    }
}
 
Example 12
@Test
public void testComputeBuckets() {
    EquiDepthStreamHistogram histo = new EquiDepthStreamHistogram(3);
    histo.addValue(bytesA);
    histo.addValue(bytesB);
    histo.addValue(bytesC);
    histo.addValue(bytesD);
    histo.addValue(bytesE);
    List<Bucket> buckets = histo.computeBuckets();
    assertEquals(3, buckets.size());
    Bucket bucket = buckets.get(0);
    assertEquals(2, bucket.getCountEstimate());
    assertInBucket(bucket, bytesA);
    assertInBucket(bucket, bytesB);
    bucket = buckets.get(1);
    assertEquals(2, bucket.getCountEstimate());
    assertInBucket(bucket, bytesC);
    assertInBucket(bucket, bytesD);
    bucket = buckets.get(2);
    assertEquals(1, bucket.getCountEstimate());
    assertInBucketInclusive(bucket, bytesE);

    // test closestSplitIdx - total count is currently 5, idealBuckSize=2
    histo.bars.clear();
    a_b.incrementCount();
    histo.bars.add(a_b);
    Bar b_d = new Bar(bytesB, bytesD);
    b_d.incrementCount(3); // use 1/3 of this bar's count for first bucket
    histo.bars.add(b_d);
    histo.bars.add(d_e);
    buckets = histo.computeBuckets();
    bucket = buckets.get(0);
    // bound should be 1/3 of [bytesB, bytesD),
    // since we used 1/3 of b_d's count for first bucket
    byte[][] splits = Bytes.split(bytesB, bytesD, 8);
    assertArrayEquals(splits[3], bucket.getRightBoundExclusive());
    bucket = buckets.get(1);
    assertArrayEquals(splits[3], bucket.leftBoundInclusive);
}
 
Example 13
@Test
public void testSplitBytes() {
    byte[] startRow = Bytes.toBytes("EA");
    byte[] stopRow = Bytes.toBytes("EZ");
    byte[][] splitPoints = Bytes.split(startRow, stopRow, 10);
    for (byte[] splitPoint : splitPoints) {
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(startRow, splitPoint) <= 0);
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(stopRow, splitPoint) >= 0);
    }
}
 
Example 14
Source Project: phoenix-omid   File: RegionSplitter.java    License: Apache License 2.0 4 votes vote down vote up
public byte[] split(byte[] start, byte[] end) {
    return Bytes.split(start, end, 1)[1];
}
 
Example 15
protected final void splitRegion(RegionInfo regionInfo) throws IOException {
  byte[][] splitPoints = Bytes.split(regionInfo.getStartKey(), regionInfo.getEndKey(), 1);
  admin.split(regionInfo.getTable(), splitPoints[1]);
}
 
Example 16
private void splitRegion(final RegionInfo regionInfo) throws IOException {
  byte[][] splitPoints = Bytes.split(regionInfo.getStartKey(), regionInfo.getEndKey(), 1);
  admin.split(regionInfo.getTable(), splitPoints[1]);
}
 
Example 17
Source Project: hbase   File: TableInputFormatBase.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Create n splits for one InputSplit, For now only support uniform distribution
 * @param split A TableSplit corresponding to a range of rowkeys
 * @param n     Number of ranges after splitting.  Pass 1 means no split for the range
 *              Pass 2 if you want to split the range in two;
 * @return A list of TableSplit, the size of the list is n
 * @throws IllegalArgumentIOException
 */
protected List<InputSplit> createNInputSplitsUniform(InputSplit split, int n)
    throws IllegalArgumentIOException {
  if (split == null || !(split instanceof TableSplit)) {
    throw new IllegalArgumentIOException(
        "InputSplit for CreateNSplitsPerRegion can not be null + "
            + "and should be instance of TableSplit");
  }
  //if n < 1, then still continue using n = 1
  n = n < 1 ? 1 : n;
  List<InputSplit> res = new ArrayList<>(n);
  if (n == 1) {
    res.add(split);
    return res;
  }

  // Collect Region related information
  TableSplit ts = (TableSplit) split;
  TableName tableName = ts.getTable();
  String regionLocation = ts.getRegionLocation();
  String encodedRegionName = ts.getEncodedRegionName();
  long regionSize = ts.getLength();
  byte[] startRow = ts.getStartRow();
  byte[] endRow = ts.getEndRow();

  // For special case: startRow or endRow is empty
  if (startRow.length == 0 && endRow.length == 0){
    startRow = new byte[1];
    endRow = new byte[1];
    startRow[0] = 0;
    endRow[0] = -1;
  }
  if (startRow.length == 0 && endRow.length != 0){
    startRow = new byte[1];
    startRow[0] = 0;
  }
  if (startRow.length != 0 && endRow.length == 0){
    endRow =new byte[startRow.length];
    for (int k = 0; k < startRow.length; k++){
      endRow[k] = -1;
    }
  }

  // Split Region into n chunks evenly
  byte[][] splitKeys = Bytes.split(startRow, endRow, true, n-1);
  for (int i = 0; i < splitKeys.length - 1; i++) {
    //notice that the regionSize parameter may be not very accurate
    TableSplit tsplit =
        new TableSplit(tableName, scan, splitKeys[i], splitKeys[i + 1], regionLocation,
            encodedRegionName, regionSize / n);
    res.add(tsplit);
  }
  return res;
}
 
Example 18
@VisibleForTesting
void splitBar(Bar origBar) {
    // short circuit - don't split a bar of length 1
    if (Bytes.compareTo(origBar.leftBoundInclusive, origBar.rightBoundExclusive) == 0) {
        return;
    }
    if (bars.size() == maxBars) { // max bars hit, need to merge two existing bars first
        boolean mergeSuccessful = mergeBars();
        if (!mergeSuccessful) return; // don't split if we couldn't merge
    }
    byte[] mid = Bytes.split(origBar.getLeftBoundInclusive(), origBar.getRightBoundExclusive(), 1)[1];
    Bar newLeft = new Bar(origBar.getLeftBoundInclusive(), mid);
    Bar newRight = new Bar(mid, origBar.getRightBoundExclusive());
    // distribute blocked bars between the new bars
    long leftSize = 0;
    long bbAggCount = origBar.getBlockedBarsSize();
    for (Bar bb : origBar.getBlockedBars()) {
        long bbSize = bb.getSize();
        if (leftSize + bbSize < bbAggCount/2) {
            leftSize += bbSize;
            newLeft.addBlockedBar(bb);
        } else {
            newRight.addBlockedBar(bb);
        }
    }
    // at this point the two new bars may have different counts,
    // distribute the rest of origBar's count to make them as close as possible
    long countToDistribute = origBar.getSize() - bbAggCount;
    long rightSize = newRight.getSize();
    long sizeDiff = Math.abs(leftSize - rightSize);
    Bar smallerBar = leftSize <= rightSize ? newLeft : newRight;
    if (sizeDiff <= countToDistribute) {
        smallerBar.incrementCount(sizeDiff);
        countToDistribute -= sizeDiff;
        long halfDistrib = countToDistribute / 2;
        newLeft.incrementCount(halfDistrib);
        newRight.incrementCount(countToDistribute - halfDistrib);
    } else {
        smallerBar.incrementCount(countToDistribute);
    }
    if (LOGGER.isTraceEnabled()) {
        LOGGER.trace(String.format("Split orig=%s , newLeft=%s , newRight=%s",
                origBar, newLeft, newRight));
    }
    bars.remove(origBar);
    bars.add(newLeft);
    bars.add(newRight);
    // technically don't need to sort here, as we can get the index from getBar,
    // and put the new bars in the same index.  But we'd have to handle merge as well,
    // doable but not worth the more complicated code since bars.size is fixed and generally small
    Collections.sort(bars);
}