Java Code Examples for org.apache.hadoop.hbase.util.Bytes#split()

The following examples show how to use org.apache.hadoop.hbase.util.Bytes#split() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RegionSplitter.java    From phoenix-omid with Apache License 2.0 6 votes vote down vote up
@Override
public byte[][] split(int numRegions) {
    Preconditions.checkArgument(
        Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
        "last row (%s) is configured less than first row (%s)",
        Bytes.toStringBinary(lastRowBytes),
        Bytes.toStringBinary(firstRowBytes));

    byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
                                  numRegions - 1);
    Preconditions.checkState(splits != null,
                             "Could not split region with given user input: " + this);

    // remove endpoints, which are included in the splits list
    return Arrays.copyOfRange(splits, 1, splits.length - 1);
}
 
Example 2
Source File: ThriftAdmin.java    From hbase with Apache License 2.0 6 votes vote down vote up
@Override
public void createTable(TableDescriptor desc, byte[] startKey, byte[] endKey, int numRegions)
    throws IOException {
  if(numRegions < 3) {
    throw new IllegalArgumentException("Must create at least three regions");
  } else if(Bytes.compareTo(startKey, endKey) >= 0) {
    throw new IllegalArgumentException("Start key must be smaller than end key");
  }
  if (numRegions == 3) {
    createTable(desc, new byte[][]{startKey, endKey});
    return;
  }
  byte [][] splitKeys = Bytes.split(startKey, endKey, numRegions - 3);
  if(splitKeys == null || splitKeys.length != numRegions - 1) {
    throw new IllegalArgumentException("Unable to split key range into enough regions");
  }
  createTable(desc, splitKeys);
}
 
Example 3
Source File: TestSimpleRegionNormalizer.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Create a list of {@link RegionInfo}s that represent a region chain of the specified length.
 */
private static List<RegionInfo> createRegionInfos(final TableName tableName, final int length) {
  if (length < 1) {
    throw new IllegalStateException("length must be greater than or equal to 1.");
  }

  final byte[] startKey = Bytes.toBytes("aaaaa");
  final byte[] endKey = Bytes.toBytes("zzzzz");
  if (length == 1) {
    return Collections.singletonList(createRegionInfo(tableName, startKey, endKey));
  }

  final byte[][] splitKeys = Bytes.split(startKey, endKey, length - 1);
  final List<RegionInfo> ret = new ArrayList<>(length);
  for (int i = 0; i < splitKeys.length - 1; i++) {
    ret.add(createRegionInfo(tableName, splitKeys[i], splitKeys[i+1]));
  }
  return ret;
}
 
Example 4
Source File: SimpleTotalOrderPartitioner.java    From hbase with Apache License 2.0 6 votes vote down vote up
@Override
public int getPartition(final ImmutableBytesWritable key, final VALUE value,
    final int reduces) {
  if (reduces == 1) return 0;
  if (this.lastReduces != reduces) {
    this.splits = Bytes.split(this.startkey, this.endkey, reduces - 1);
    for (int i = 0; i < splits.length; i++) {
      LOG.info(Bytes.toStringBinary(splits[i]));
    }
    this.lastReduces = reduces;
  }
  int pos = Bytes.binarySearch(this.splits, key.get(), key.getOffset(),
    key.getLength());
  // Below code is from hfile index search.
  if (pos < 0) {
    pos++;
    pos *= -1;
    if (pos == 0) {
      // falls before the beginning of the file.
      throw new RuntimeException("Key outside start/stop range: " +
        key.toString());
    }
    pos--;
  }
  return pos;
}
 
Example 5
Source File: ByteUtilTest.java    From phoenix with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testSplitBytes() {
    byte[] startRow = Bytes.toBytes("EA");
    byte[] stopRow = Bytes.toBytes("EZ");
    byte[][] splitPoints = Bytes.split(startRow, stopRow, 10);
    for (byte[] splitPoint : splitPoints) {
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(startRow, splitPoint) <= 0);
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(stopRow, splitPoint) >= 0);
    }
}
 
Example 6
Source File: EquiDepthStreamHistogramTest.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Test
public void testComputeBuckets() {
    EquiDepthStreamHistogram histo = new EquiDepthStreamHistogram(3);
    histo.addValue(bytesA);
    histo.addValue(bytesB);
    histo.addValue(bytesC);
    histo.addValue(bytesD);
    histo.addValue(bytesE);
    List<Bucket> buckets = histo.computeBuckets();
    assertEquals(3, buckets.size());
    Bucket bucket = buckets.get(0);
    assertEquals(2, bucket.getCountEstimate());
    assertInBucket(bucket, bytesA);
    assertInBucket(bucket, bytesB);
    bucket = buckets.get(1);
    assertEquals(2, bucket.getCountEstimate());
    assertInBucket(bucket, bytesC);
    assertInBucket(bucket, bytesD);
    bucket = buckets.get(2);
    assertEquals(1, bucket.getCountEstimate());
    assertInBucketInclusive(bucket, bytesE);

    // test closestSplitIdx - total count is currently 5, idealBuckSize=2
    histo.bars.clear();
    a_b.incrementCount();
    histo.bars.add(a_b);
    Bar b_d = new Bar(bytesB, bytesD);
    b_d.incrementCount(3); // use 1/3 of this bar's count for first bucket
    histo.bars.add(b_d);
    histo.bars.add(d_e);
    buckets = histo.computeBuckets();
    bucket = buckets.get(0);
    // bound should be 1/3 of [bytesB, bytesD),
    // since we used 1/3 of b_d's count for first bucket
    byte[][] splits = Bytes.split(bytesB, bytesD, 8);
    assertArrayEquals(splits[3], bucket.getRightBoundExclusive());
    bucket = buckets.get(1);
    assertArrayEquals(splits[3], bucket.leftBoundInclusive);
}
 
Example 7
Source File: ByteUtilTest.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitBytes() {
    byte[] startRow = Bytes.toBytes("EA");
    byte[] stopRow = Bytes.toBytes("EZ");
    byte[][] splitPoints = Bytes.split(startRow, stopRow, 10);
    for (byte[] splitPoint : splitPoints) {
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(startRow, splitPoint) <= 0);
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(stopRow, splitPoint) >= 0);
    }
}
 
Example 8
Source File: EquiDepthStreamHistogram.java    From phoenix with Apache License 2.0 5 votes vote down vote up
/**
 * Compute the buckets, which have the boundaries and estimated counts.
 * Note that the right bound for the very last bucket is inclusive.
 * The left and right bounds can be equivalent, for single value buckets.
 * @return
 */
public List<Bucket> computeBuckets() {
    Preconditions.checkState(bars.size() >= numBuckets, "Not enough data points to compute buckets");
    List<Bucket> buckets = new ArrayList<>();
    long idealBuckSize = (long) Math.ceil(totalCount / (double) numBuckets);
    long currCount = 0;
    int barsIdx = 0;
    byte[] prevBound = bars.get(0).leftBoundInclusive;
    Bar currBar = null;
    for (int i = 0; i < numBuckets; i++) {
        while (currCount <= idealBuckSize && barsIdx < bars.size()) {
            currBar = bars.get(barsIdx++);
            currCount += currBar.getSize();
        }
        long surplus = Math.max(currCount - idealBuckSize, 0);
        // deviate a bit from the paper here
        // to estimate the bound, we split the range into 8 splits for a total of 10 including start/end
        // then we calculate the % of the currBar's count we've used, and round down to the closest split
        int closestSplitIdx = (int) ((1 - ((double) surplus / currBar.getSize())) * 9);
        byte[][] splits = Bytes.split(currBar.leftBoundInclusive, currBar.rightBoundExclusive, 8);
        Bucket bucket = new Bucket(prevBound, splits[closestSplitIdx]);
        bucket.incrementCountEstimate(currCount - surplus);
        prevBound = splits[closestSplitIdx];
        buckets.add(bucket);
        currCount = surplus;
    }
    return buckets;
}
 
Example 9
Source File: CreateTableChange.java    From pinpoint with Apache License 2.0 5 votes vote down vote up
private byte[][] generateSplitKeys() {
    byte[][] splits = Bytes.split(FIRST_ROW_BYTES, LAST_ROW_BYTES, true, numRegions - 1);
    // remove endpoints, which are included in the splits list
    if (splits == null) {
        throw new IllegalStateException("Could not generate split keys, numRegions : " + numRegions);
    }
    return Arrays.copyOfRange(splits, 1, splits.length - 1);
}
 
Example 10
Source File: RawAsyncHBaseAdmin.java    From hbase with Apache License 2.0 5 votes vote down vote up
private byte[][] getSplitKeys(byte[] startKey, byte[] endKey, int numRegions) {
  if (numRegions < 3) {
    throw new IllegalArgumentException("Must create at least three regions");
  } else if (Bytes.compareTo(startKey, endKey) >= 0) {
    throw new IllegalArgumentException("Start key must be smaller than end key");
  }
  if (numRegions == 3) {
    return new byte[][] { startKey, endKey };
  }
  byte[][] splitKeys = Bytes.split(startKey, endKey, numRegions - 3);
  if (splitKeys == null || splitKeys.length != numRegions - 1) {
    throw new IllegalArgumentException("Unable to split key range into enough regions");
  }
  return splitKeys;
}
 
Example 11
Source File: HBaseTestingUtility.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Create region split keys between startkey and endKey
 *
 * @param startKey
 * @param endKey
 * @param numRegions the number of regions to be created. it has to be greater than 3.
 * @return resulting split keys
 */
public byte[][] getRegionSplitStartKeys(byte[] startKey, byte[] endKey, int numRegions){
  assertTrue(numRegions>3);
  byte [][] tmpSplitKeys = Bytes.split(startKey, endKey, numRegions - 3);
  byte [][] result = new byte[tmpSplitKeys.length+1][];
  System.arraycopy(tmpSplitKeys, 0, result, 1, tmpSplitKeys.length);
  result[0] = HConstants.EMPTY_BYTE_ARRAY;
  return result;
}
 
Example 12
Source File: HBaseTestingUtility.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Create a table with multiple regions.
 * @param tableName
 * @param family
 * @param numRegions
 * @return A Table instance for the created table.
 * @throws IOException
 */
public Table createMultiRegionTable(TableName tableName, byte[] family, int numRegions)
    throws IOException {
  if (numRegions < 3) throw new IOException("Must create at least 3 regions");
  byte[] startKey = Bytes.toBytes("aaaaa");
  byte[] endKey = Bytes.toBytes("zzzzz");
  byte[][] splitKeys = Bytes.split(startKey, endKey, numRegions - 3);

  return createTable(tableName, new byte[][] { family }, splitKeys);
}
 
Example 13
Source File: ByteUtilTest.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitBytes() {
    byte[] startRow = Bytes.toBytes("EA");
    byte[] stopRow = Bytes.toBytes("EZ");
    byte[][] splitPoints = Bytes.split(startRow, stopRow, 10);
    for (byte[] splitPoint : splitPoints) {
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(startRow, splitPoint) <= 0);
        assertTrue(Bytes.toStringBinary(splitPoint), Bytes.compareTo(stopRow, splitPoint) >= 0);
    }
}
 
Example 14
Source File: TableInputFormatBase.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Create n splits for one InputSplit, For now only support uniform distribution
 * @param split A TableSplit corresponding to a range of rowkeys
 * @param n     Number of ranges after splitting.  Pass 1 means no split for the range
 *              Pass 2 if you want to split the range in two;
 * @return A list of TableSplit, the size of the list is n
 * @throws IllegalArgumentIOException
 */
protected List<InputSplit> createNInputSplitsUniform(InputSplit split, int n)
    throws IllegalArgumentIOException {
  if (split == null || !(split instanceof TableSplit)) {
    throw new IllegalArgumentIOException(
        "InputSplit for CreateNSplitsPerRegion can not be null + "
            + "and should be instance of TableSplit");
  }
  //if n < 1, then still continue using n = 1
  n = n < 1 ? 1 : n;
  List<InputSplit> res = new ArrayList<>(n);
  if (n == 1) {
    res.add(split);
    return res;
  }

  // Collect Region related information
  TableSplit ts = (TableSplit) split;
  TableName tableName = ts.getTable();
  String regionLocation = ts.getRegionLocation();
  String encodedRegionName = ts.getEncodedRegionName();
  long regionSize = ts.getLength();
  byte[] startRow = ts.getStartRow();
  byte[] endRow = ts.getEndRow();

  // For special case: startRow or endRow is empty
  if (startRow.length == 0 && endRow.length == 0){
    startRow = new byte[1];
    endRow = new byte[1];
    startRow[0] = 0;
    endRow[0] = -1;
  }
  if (startRow.length == 0 && endRow.length != 0){
    startRow = new byte[1];
    startRow[0] = 0;
  }
  if (startRow.length != 0 && endRow.length == 0){
    endRow =new byte[startRow.length];
    for (int k = 0; k < startRow.length; k++){
      endRow[k] = -1;
    }
  }

  // Split Region into n chunks evenly
  byte[][] splitKeys = Bytes.split(startRow, endRow, true, n-1);
  for (int i = 0; i < splitKeys.length - 1; i++) {
    //notice that the regionSize parameter may be not very accurate
    TableSplit tsplit =
        new TableSplit(tableName, scan, splitKeys[i], splitKeys[i + 1], regionLocation,
            encodedRegionName, regionSize / n);
    res.add(tsplit);
  }
  return res;
}
 
Example 15
Source File: CloneSnapshotFromClientAfterSplittingRegionTestBase.java    From hbase with Apache License 2.0 4 votes vote down vote up
private void splitRegion(final RegionInfo regionInfo) throws IOException {
  byte[][] splitPoints = Bytes.split(regionInfo.getStartKey(), regionInfo.getEndKey(), 1);
  admin.split(regionInfo.getTable(), splitPoints[1]);
}
 
Example 16
Source File: EquiDepthStreamHistogram.java    From phoenix with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
void splitBar(Bar origBar) {
    // short circuit - don't split a bar of length 1
    if (Bytes.compareTo(origBar.leftBoundInclusive, origBar.rightBoundExclusive) == 0) {
        return;
    }
    if (bars.size() == maxBars) { // max bars hit, need to merge two existing bars first
        boolean mergeSuccessful = mergeBars();
        if (!mergeSuccessful) return; // don't split if we couldn't merge
    }
    byte[] mid = Bytes.split(origBar.getLeftBoundInclusive(), origBar.getRightBoundExclusive(), 1)[1];
    Bar newLeft = new Bar(origBar.getLeftBoundInclusive(), mid);
    Bar newRight = new Bar(mid, origBar.getRightBoundExclusive());
    // distribute blocked bars between the new bars
    long leftSize = 0;
    long bbAggCount = origBar.getBlockedBarsSize();
    for (Bar bb : origBar.getBlockedBars()) {
        long bbSize = bb.getSize();
        if (leftSize + bbSize < bbAggCount/2) {
            leftSize += bbSize;
            newLeft.addBlockedBar(bb);
        } else {
            newRight.addBlockedBar(bb);
        }
    }
    // at this point the two new bars may have different counts,
    // distribute the rest of origBar's count to make them as close as possible
    long countToDistribute = origBar.getSize() - bbAggCount;
    long rightSize = newRight.getSize();
    long sizeDiff = Math.abs(leftSize - rightSize);
    Bar smallerBar = leftSize <= rightSize ? newLeft : newRight;
    if (sizeDiff <= countToDistribute) {
        smallerBar.incrementCount(sizeDiff);
        countToDistribute -= sizeDiff;
        long halfDistrib = countToDistribute / 2;
        newLeft.incrementCount(halfDistrib);
        newRight.incrementCount(countToDistribute - halfDistrib);
    } else {
        smallerBar.incrementCount(countToDistribute);
    }
    if (LOGGER.isTraceEnabled()) {
        LOGGER.trace(String.format("Split orig=%s , newLeft=%s , newRight=%s",
                origBar, newLeft, newRight));
    }
    bars.remove(origBar);
    bars.add(newLeft);
    bars.add(newRight);
    // technically don't need to sort here, as we can get the index from getBar,
    // and put the new bars in the same index.  But we'd have to handle merge as well,
    // doable but not worth the more complicated code since bars.size is fixed and generally small
    Collections.sort(bars);
}
 
Example 17
Source File: RestoreSnapshotFromClientTestBase.java    From hbase with Apache License 2.0 4 votes vote down vote up
protected final void splitRegion(RegionInfo regionInfo) throws IOException {
  byte[][] splitPoints = Bytes.split(regionInfo.getStartKey(), regionInfo.getEndKey(), 1);
  admin.split(regionInfo.getTable(), splitPoints[1]);
}
 
Example 18
Source File: RegionSplitter.java    From phoenix-omid with Apache License 2.0 4 votes vote down vote up
public byte[] split(byte[] start, byte[] end) {
    return Bytes.split(start, end, 1)[1];
}