Java Code Examples for org.apache.hadoop.hbase.CellUtil#matchingRows()

The following examples show how to use org.apache.hadoop.hbase.CellUtil#matchingRows() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ReversedKeyValueHeap.java From hbase with Apache License 2.0

6 votes

@Override
public boolean backwardSeek(Cell seekKey) throws IOException {
  if (current == null) {
    return false;
  }
  heap.add(current);
  current = null;

  KeyValueScanner scanner;
  while ((scanner = heap.poll()) != null) {
    Cell topKey = scanner.peek();
    if ((CellUtil.matchingRows(seekKey, topKey) && comparator
        .getComparator().compare(seekKey, topKey) <= 0)
        || comparator.getComparator().compareRows(seekKey, topKey) > 0) {
      heap.add(scanner);
      current = pollRealKV();
      return current != null;
    }
    if (!scanner.backwardSeek(seekKey)) {
      this.scannersForDelayedClose.add(scanner);
    } else {
      heap.add(scanner);
    }
  }
  return false;
}

Example 2

Source File: ReplicationSourceWALReader.java From hbase with Apache License 2.0

5 votes

/**
 * Count the number of different row keys in the given edit because of mini-batching. We assume
 * that there's at least one Cell in the WALEdit.
 * @param edit edit to count row keys from
 * @return number of different row keys and HFiles
 */
private Pair<Integer, Integer> countDistinctRowKeysAndHFiles(WALEdit edit) {
  List<Cell> cells = edit.getCells();
  int distinctRowKeys = 1;
  int totalHFileEntries = 0;
  Cell lastCell = cells.get(0);

  int totalCells = edit.size();
  for (int i = 0; i < totalCells; i++) {
    // Count HFiles to be replicated
    if (CellUtil.matchingQualifier(cells.get(i), WALEdit.BULK_LOAD)) {
      try {
        BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cells.get(i));
        List<StoreDescriptor> stores = bld.getStoresList();
        int totalStores = stores.size();
        for (int j = 0; j < totalStores; j++) {
          totalHFileEntries += stores.get(j).getStoreFileList().size();
        }
      } catch (IOException e) {
        LOG.error("Failed to deserialize bulk load entry from wal edit. "
            + "Then its hfiles count will not be added into metric.");
      }
    }

    if (!CellUtil.matchingRows(cells.get(i), lastCell)) {
      distinctRowKeys++;
    }
    lastCell = cells.get(i);
  }

  Pair<Integer, Integer> result = new Pair<>(distinctRowKeys, totalHFileEntries);
  return result;
}

Example 3

Source File: RowBloomContext.java From hbase with Apache License 2.0

5 votes

@Override
protected boolean isNewKey(Cell cell) {
  if (this.getLastCell() != null) {
    return !CellUtil.matchingRows(cell, this.getLastCell());
  }
  return true;
}

Example 4

Source File: StoreScanner.java From hbase with Apache License 2.0

5 votes

private void resetQueryMatcher(Cell lastTopKey) {
  // Reset the state of the Query Matcher and set to top row.
  // Only reset and call setRow if the row changes; avoids confusing the
  // query matcher if scanning intra-row.
  Cell cell = heap.peek();
  if (cell == null) {
    cell = lastTopKey;
  }
  if ((matcher.currentRow() == null) || !CellUtil.matchingRows(cell, matcher.currentRow())) {
    this.countPerRow = 0;
    // The setToNewRow will call reset internally
    matcher.setToNewRow(cell);
  }
}

Example 5

Source File: TestCompoundBloomFilter.java From hbase with Apache License 2.0

5 votes

private Path writeStoreFile(int t, BloomType bt, List<KeyValue> kvs)
    throws IOException {
  conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
      BLOOM_BLOCK_SIZES[t]);
  conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
  cacheConf = new CacheConfig(conf, blockCache);
  HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCK_SIZES[t]).build();
  StoreFileWriter w = new StoreFileWriter.Builder(conf, cacheConf, fs)
          .withOutputDir(TEST_UTIL.getDataTestDir())
          .withBloomType(bt)
          .withFileContext(meta)
          .build();

  assertTrue(w.hasGeneralBloom());
  assertTrue(w.getGeneralBloomWriter() instanceof CompoundBloomFilterWriter);
  CompoundBloomFilterWriter cbbf =
      (CompoundBloomFilterWriter) w.getGeneralBloomWriter();

  int keyCount = 0;
  KeyValue prev = null;
  LOG.debug("Total keys/values to insert: " + kvs.size());
  for (KeyValue kv : kvs) {
    w.append(kv);

    // Validate the key count in the Bloom filter.
    boolean newKey = true;
    if (prev != null) {
      newKey = !(bt == BloomType.ROW ? CellUtil.matchingRows(kv,
          prev) : CellUtil.matchingRowColumn(kv, prev));
    }
    if (newKey)
      ++keyCount;
    assertEquals(keyCount, cbbf.getKeyCount());

    prev = kv;
  }
  w.close();

  return w.getPath();
}

Example 6

Source File: AllowPartialScanResultCache.java From hbase with Apache License 2.0

5 votes

@Override
public Result[] addAndGet(Result[] results, boolean isHeartbeatMessage) throws IOException {
  if (results.length == 0) {
    if (!isHeartbeatMessage && lastResultPartial) {
      // An empty non heartbeat result indicate that there must be a row change. So if the
      // lastResultPartial is true then we need to increase numberOfCompleteRows.
      numberOfCompleteRows++;
    }
    return EMPTY_RESULT_ARRAY;
  }
  int i;
  for (i = 0; i < results.length; i++) {
    Result r = filterCells(results[i], lastCell);
    if (r != null) {
      results[i] = r;
      break;
    }
  }
  if (i == results.length) {
    return EMPTY_RESULT_ARRAY;
  }
  if (lastResultPartial && !CellUtil.matchingRows(lastCell, results[0].getRow())) {
    // there is a row change, so increase numberOfCompleteRows
    numberOfCompleteRows++;
  }
  recordLastResult(results[results.length - 1]);
  if (i > 0) {
    results = Arrays.copyOfRange(results, i, results.length);
  }
  for (Result result : results) {
    if (!result.mayHaveMoreCellsInRow()) {
      numberOfCompleteRows++;
    }
  }
  return results;
}

Example 7

Source File: Mutation.java From hbase with Apache License 2.0

5 votes

Mutation add(Cell cell) throws IOException {
  //Checking that the row of the kv is the same as the mutation
  // TODO: It is fraught with risk if user pass the wrong row.
  // Throwing the IllegalArgumentException is more suitable I'd say.
  if (!CellUtil.matchingRows(cell, this.row)) {
    throw new WrongRowIOException("The row in " + cell.toString() +
      " doesn't match the original one " +  Bytes.toStringBinary(this.row));
  }

  byte[] family;

  if (cell instanceof IndividualBytesFieldCell) {
    family = cell.getFamilyArray();
  } else {
    family = CellUtil.cloneFamily(cell);
  }

  if (family == null || family.length == 0) {
    throw new IllegalArgumentException("Family cannot be null");
  }

  if (cell instanceof ExtendedCell) {
    getCellList(family).add(cell);
  } else {
    getCellList(family).add(new CellWrapper(cell));
  }
  return this;
}

Example 8

Source File: ReplicationSink.java From hbase with Apache License 2.0

4 votes

/**
 * @return True if we have crossed over onto a new row or type
 */
private boolean isNewRowOrType(final Cell previousCell, final Cell cell) {
  return previousCell == null || previousCell.getTypeByte() != cell.getTypeByte() ||
      !CellUtil.matchingRows(previousCell, cell);
}

Example 9

Source File: MutableSegment.java From hbase with Apache License 2.0

4 votes

public void upsert(Cell cell, long readpoint, MemStoreSizing memStoreSizing,
    boolean sizeAddedPreOperation) {
  internalAdd(cell, false, memStoreSizing, sizeAddedPreOperation);

  // Get the Cells for the row/family/qualifier regardless of timestamp.
  // For this case we want to clean up any other puts
  Cell firstCell = PrivateCellUtil.createFirstOnRowColTS(cell, HConstants.LATEST_TIMESTAMP);
  SortedSet<Cell> ss = this.tailSet(firstCell);
  Iterator<Cell> it = ss.iterator();
  // versions visible to oldest scanner
  int versionsVisible = 0;
  while (it.hasNext()) {
    Cell cur = it.next();

    if (cell == cur) {
      // ignore the one just put in
      continue;
    }
    // check that this is the row and column we are interested in, otherwise bail
    if (CellUtil.matchingRows(cell, cur) && CellUtil.matchingQualifier(cell, cur)) {
      // only remove Puts that concurrent scanners cannot possibly see
      if (cur.getTypeByte() == KeyValue.Type.Put.getCode() && cur.getSequenceId() <= readpoint) {
        if (versionsVisible >= 1) {
          // if we get here we have seen at least one version visible to the oldest scanner,
          // which means we can prove that no scanner will see this version

          // false means there was a change, so give us the size.
          // TODO when the removed cell ie.'cur' having its data in MSLAB, we can not release that
          // area. Only the Cell object as such going way. We need to consider cellLen to be
          // decreased there as 0 only. Just keeping it as existing code now. We need to know the
          // removed cell is from MSLAB or not. Will do once HBASE-16438 is in
          int cellLen = getCellLength(cur);
          long heapSize = heapSizeChange(cur, true);
          long offHeapSize = offHeapSizeChange(cur, true);
          incMemStoreSize(-cellLen, -heapSize, -offHeapSize, -1);
          if (memStoreSizing != null) {
            memStoreSizing.decMemStoreSize(cellLen, heapSize, offHeapSize, 1);
          }
          it.remove();
        } else {
          versionsVisible++;
        }
      }
    } else {
      // past the row or column, done
      break;
    }
  }
}

Example 10

Source File: StoreScanner.java From hbase with Apache License 2.0

4 votes

/**
 * See if we should actually SEEK or rather just SKIP to the next Cell (see HBASE-13109).
 * ScanQueryMatcher may issue SEEK hints, such as seek to next column, next row,
 * or seek to an arbitrary seek key. This method decides whether a seek is the most efficient
 * _actual_ way to get us to the requested cell (SEEKs are more expensive than SKIP, SKIP,
 * SKIP inside the current, loaded block).
 * It does this by looking at the next indexed key of the current HFile. This key
 * is then compared with the _SEEK_ key, where a SEEK key is an artificial 'last possible key
 * on the row' (only in here, we avoid actually creating a SEEK key; in the compare we work with
 * the current Cell but compare as though it were a seek key; see down in
 * matcher.compareKeyForNextRow, etc). If the compare gets us onto the
 * next block we *_SEEK, otherwise we just SKIP to the next requested cell.
 *
 * <p>Other notes:
 * <ul>
 * <li>Rows can straddle block boundaries</li>
 * <li>Versions of columns can straddle block boundaries (i.e. column C1 at T1 might be in a
 * different block than column C1 at T2)</li>
 * <li>We want to SKIP if the chance is high that we'll find the desired Cell after a
 * few SKIPs...</li>
 * <li>We want to SEEK when the chance is high that we'll be able to seek
 * past many Cells, especially if we know we need to go to the next block.</li>
 * </ul>
 * <p>A good proxy (best effort) to determine whether SKIP is better than SEEK is whether
 * we'll likely end up seeking to the next block (or past the next block) to get our next column.
 * Example:
 * <pre>
 * |    BLOCK 1              |     BLOCK 2                   |
 * |  r1/c1, r1/c2, r1/c3    |    r1/c4, r1/c5, r2/c1        |
 *                                   ^         ^
 *                                   |         |
 *                           Next Index Key   SEEK_NEXT_ROW (before r2/c1)
 *
 *
 * |    BLOCK 1                       |     BLOCK 2                      |
 * |  r1/c1/t5, r1/c1/t4, r1/c1/t3    |    r1/c1/t2, r1/c1/T1, r1/c2/T3  |
 *                                            ^              ^
 *                                            |              |
 *                                    Next Index Key        SEEK_NEXT_COL
 * </pre>
 * Now imagine we want columns c1 and c3 (see first diagram above), the 'Next Index Key' of r1/c4
 * is > r1/c3 so we should seek to get to the c1 on the next row, r2. In second case, say we only
 * want one version of c1, after we have it, a SEEK_COL will be issued to get to c2. Looking at
 * the 'Next Index Key', it would land us in the next block, so we should SEEK. In other scenarios
 * where the SEEK will not land us in the next block, it is very likely better to issues a series
 * of SKIPs.
 * @param cell current cell
 * @return true means skip to next row, false means not
 */
@VisibleForTesting
protected boolean trySkipToNextRow(Cell cell) throws IOException {
  Cell nextCell = null;
  // used to guard against a changed next indexed key by doing a identity comparison
  // when the identity changes we need to compare the bytes again
  Cell previousIndexedKey = null;
  do {
    Cell nextIndexedKey = getNextIndexedKey();
    if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY &&
        (nextIndexedKey == previousIndexedKey ||
        matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0)) {
      this.heap.next();
      ++kvsScanned;
      previousIndexedKey = nextIndexedKey;
    } else {
      return false;
    }
  } while ((nextCell = this.heap.peek()) != null && CellUtil.matchingRows(cell, nextCell));
  return true;
}

Example 11

Source File: TestSyncTable.java From hbase with Apache License 2.0

4 votes

private void assertTargetDoDeletesFalse(int expectedRows, TableName sourceTableName,
    TableName targetTableName) throws Exception {
  Table sourceTable = TEST_UTIL.getConnection().getTable(sourceTableName);
  Table targetTable = TEST_UTIL.getConnection().getTable(targetTableName);

  ResultScanner sourceScanner = sourceTable.getScanner(new Scan());
  ResultScanner targetScanner = targetTable.getScanner(new Scan());
  Result targetRow = targetScanner.next();
  Result sourceRow = sourceScanner.next();
  int rowsCount = 0;
  while (targetRow != null) {
    rowsCount++;
    //only compares values for existing rows, skipping rows existing on
    //target only that were not deleted given --doDeletes=false
    if (Bytes.toInt(sourceRow.getRow()) != Bytes.toInt(targetRow.getRow())) {
      targetRow = targetScanner.next();
      continue;
    }

    LOG.debug("SOURCE row: " + (sourceRow == null ? "null"
        : Bytes.toInt(sourceRow.getRow()))
        + " cells:" + sourceRow);
    LOG.debug("TARGET row: " + (targetRow == null ? "null"
        : Bytes.toInt(targetRow.getRow()))
        + " cells:" + targetRow);

    Cell[] sourceCells = sourceRow.rawCells();
    Cell[] targetCells = targetRow.rawCells();
    int targetRowKey = Bytes.toInt(targetRow.getRow());
    if (targetRowKey >= 70 && targetRowKey < 80) {
      if (sourceCells.length == targetCells.length) {
        LOG.debug("Source cells: " + Arrays.toString(sourceCells));
        LOG.debug("Target cells: " + Arrays.toString(targetCells));
        Assert.fail("Row " + targetRowKey + " should have more cells in "
            + "target than in source");
      }

    } else {
      if (sourceCells.length != targetCells.length) {
        LOG.debug("Source cells: " + Arrays.toString(sourceCells));
        LOG.debug("Target cells: " + Arrays.toString(targetCells));
        Assert.fail("Row " + Bytes.toInt(sourceRow.getRow())
            + " has " + sourceCells.length
            + " cells in source table but " + targetCells.length
            + " cells in target table");
      }
    }
    for (int j = 0; j < sourceCells.length; j++) {
      Cell sourceCell = sourceCells[j];
      Cell targetCell = targetCells[j];
      try {
        if (!CellUtil.matchingRows(sourceCell, targetCell)) {
          Assert.fail("Rows don't match");
        }
        if (!CellUtil.matchingFamily(sourceCell, targetCell)) {
          Assert.fail("Families don't match");
        }
        if (!CellUtil.matchingQualifier(sourceCell, targetCell)) {
          Assert.fail("Qualifiers don't match");
        }
        if (targetRowKey < 80 && targetRowKey >= 90){
          if (!CellUtil.matchingTimestamp(sourceCell, targetCell)) {
            Assert.fail("Timestamps don't match");
          }
        }
        if (!CellUtil.matchingValue(sourceCell, targetCell)) {
          Assert.fail("Values don't match");
        }
      } catch (Throwable t) {
        LOG.debug("Source cell: " + sourceCell + " target cell: "
            + targetCell);
        Throwables.propagate(t);
      }
    }
    targetRow = targetScanner.next();
    sourceRow = sourceScanner.next();
  }
  assertEquals("Target expected rows does not match.",expectedRows,
      rowsCount);
  sourceScanner.close();
  targetScanner.close();
  sourceTable.close();
  targetTable.close();
}

Example 12

Source File: BatchScanResultCache.java From hbase with Apache License 2.0

4 votes

@Override
public Result[] addAndGet(Result[] results, boolean isHeartbeatMessage) throws IOException {
  if (results.length == 0) {
    if (!isHeartbeatMessage) {
      if (!partialResults.isEmpty()) {
        return new Result[] { createCompletedResult() };
      }
      if (lastResultPartial) {
        // An empty non heartbeat result indicate that there must be a row change. So if the
        // lastResultPartial is true then we need to increase numberOfCompleteRows.
        numberOfCompleteRows++;
      }
    }
    return EMPTY_RESULT_ARRAY;
  }
  List<Result> regroupedResults = new ArrayList<>();
  for (Result result : results) {
    result = filterCells(result, lastCell);
    if (result == null) {
      continue;
    }
    if (!partialResults.isEmpty()) {
      if (!Bytes.equals(partialResults.peek().getRow(), result.getRow())) {
        // there is a row change
        regroupedResults.add(createCompletedResult());
      }
    } else if (lastResultPartial && !CellUtil.matchingRows(lastCell, result.getRow())) {
      // As for batched scan we may return partial results to user if we reach the batch limit, so
      // here we need to use lastCell to determine if there is row change and increase
      // numberOfCompleteRows.
      numberOfCompleteRows++;
    }
    // check if we have a row change
    if (!partialResults.isEmpty() &&
        !Bytes.equals(partialResults.peek().getRow(), result.getRow())) {
      regroupedResults.add(createCompletedResult());
    }
    Result regroupedResult = regroupResults(result);
    if (regroupedResult != null) {
      if (!regroupedResult.mayHaveMoreCellsInRow()) {
        numberOfCompleteRows++;
      }
      regroupedResults.add(regroupedResult);
      // only update last cell when we actually return it to user.
      recordLastResult(regroupedResult);
    }
    if (!result.mayHaveMoreCellsInRow() && !partialResults.isEmpty()) {
      // We are done for this row
      regroupedResults.add(createCompletedResult());
    }
  }
  return regroupedResults.toArray(new Result[0]);
}

Example 13

Source File: WriteHeavyIncrementObserver.java From hbase with Apache License 2.0

4 votes

private InternalScanner wrap(byte[] family, InternalScanner scanner) {
  return new InternalScanner() {

    private List<Cell> srcResult = new ArrayList<>();

    private byte[] row;

    private byte[] qualifier;

    private long timestamp;

    private long sum;

    @Override
    public boolean next(List<Cell> result, ScannerContext scannerContext) throws IOException {
      boolean moreRows = scanner.next(srcResult, scannerContext);
      if (srcResult.isEmpty()) {
        if (!moreRows && row != null) {
          result.add(createCell(row, family, qualifier, timestamp, sum));
        }
        return moreRows;
      }
      Cell firstCell = srcResult.get(0);
      // Check if there is a row change first. All the cells will come from the same row so just
      // check the first one once is enough.
      if (row == null) {
        row = CellUtil.cloneRow(firstCell);
        qualifier = CellUtil.cloneQualifier(firstCell);
      } else if (!CellUtil.matchingRows(firstCell, row)) {
        result.add(createCell(row, family, qualifier, timestamp, sum));
        row = CellUtil.cloneRow(firstCell);
        qualifier = CellUtil.cloneQualifier(firstCell);
        sum = 0;
      }
      srcResult.forEach(c -> {
        if (CellUtil.matchingQualifier(c, qualifier)) {
          sum += Bytes.toLong(c.getValueArray(), c.getValueOffset());
        } else {
          result.add(createCell(row, family, qualifier, timestamp, sum));
          qualifier = CellUtil.cloneQualifier(c);
          sum = Bytes.toLong(c.getValueArray(), c.getValueOffset());
        }
        timestamp = c.getTimestamp();
      });
      if (!moreRows) {
        result.add(createCell(row, family, qualifier, timestamp, sum));
      }
      srcResult.clear();
      return moreRows;
    }

    @Override
    public void close() throws IOException {
      scanner.close();
    }
  };
}