Java Code Examples for org.apache.hadoop.hbase.client.Scan#setMaxResultSize()

The following examples show how to use org.apache.hadoop.hbase.client.Scan#setMaxResultSize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CubeHBaseRPC.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
public static Scan buildScan(RawScan rawScan) {
    Scan scan = new Scan();
    scan.setCaching(rawScan.hbaseCaching);
    scan.setMaxResultSize(rawScan.hbaseMaxResultSize);
    scan.setCacheBlocks(true);
    scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE));

    if (rawScan.startKey != null) {
        scan.setStartRow(rawScan.startKey);
    }
    if (rawScan.endKey != null) {
        scan.setStopRow(rawScan.endKey);
    }
    if (rawScan.fuzzyKeys != null) {
        applyFuzzyFilter(scan, rawScan.fuzzyKeys);
    }
    if (rawScan.hbaseColumns != null) {
        applyHBaseColums(scan, rawScan.hbaseColumns);
    }

    return scan;
}
 
Example 2
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * @param resultSizeRowLimit The row limit that will be enforced through maxResultSize
 * @param cachingRowLimit The row limit that will be enforced through caching
 */
public void testPartialResultsAndCaching(int resultSizeRowLimit, int cachingRowLimit)
    throws Exception {
  Scan scan = new Scan();
  scan.setAllowPartialResults(true);

  // The number of cells specified in the call to getResultSizeForNumberOfCells is offset to
  // ensure that the result size we specify is not an exact multiple of the number of cells
  // in a row. This ensures that partial results will be returned when the result size limit
  // is reached before the caching limit.
  int cellOffset = NUM_COLS / 3;
  long maxResultSize = getResultSizeForNumberOfCells(resultSizeRowLimit * NUM_COLS + cellOffset);
  scan.setMaxResultSize(maxResultSize);
  scan.setCaching(cachingRowLimit);

  try (ResultScanner scanner = TABLE.getScanner(scan)) {
    Result r = null;
    // Approximate the number of rows we expect will fit into the specified max rsult size. If
    // this approximation is less than caching, then we expect that the max result size limit will
    // be hit before the caching limit and thus partial results may be seen
    boolean expectToSeePartialResults = resultSizeRowLimit < cachingRowLimit;
    while ((r = scanner.next()) != null) {
      assertTrue(!r.mayHaveMoreCellsInRow() || expectToSeePartialResults);
    }
  }
}
 
Example 3
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * When a scan has a filter where {@link org.apache.hadoop.hbase.filter.Filter#hasFilterRow()} is
 * true, the scanner should not return partial results. The scanner cannot return partial results
 * because the entire row needs to be read for the include/exclude decision to be made
 */
@Test
public void testNoPartialResultsWhenRowFilterPresent() throws Exception {
  Scan scan = new Scan();
  scan.setMaxResultSize(1);
  scan.setAllowPartialResults(true);
  // If a filter hasFilter() is true then partial results should not be returned else filter
  // application server side would break.
  scan.setFilter(new RandomRowFilter(1.0f));
  ResultScanner scanner = TABLE.getScanner(scan);

  Result r = null;
  while ((r = scanner.next()) != null) {
    assertFalse(r.mayHaveMoreCellsInRow());
  }

  scanner.close();
}
 
Example 4
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * When reconstructing the complete result from its partials we ensure that the row of each
 * partial result is the same. If one of the rows differs, an exception is thrown.
 */
@Test
public void testExceptionThrownOnMismatchedPartialResults() throws IOException {
  assertTrue(NUM_ROWS >= 2);

  ArrayList<Result> partials = new ArrayList<>();
  Scan scan = new Scan();
  scan.setMaxResultSize(Long.MAX_VALUE);
  ResultScanner scanner = TABLE.getScanner(scan);
  Result r1 = scanner.next();
  partials.add(r1);
  Result r2 = scanner.next();
  partials.add(r2);

  assertFalse(Bytes.equals(r1.getRow(), r2.getRow()));

  try {
    Result.createCompleteResult(partials);
    fail("r1 and r2 are from different rows. It should not be possible to combine them into"
        + " a single result");
  } catch (IOException e) {
  }

  scanner.close();
}
 
Example 5
Source File: CubeHBaseRPC.java    From kylin with Apache License 2.0 6 votes vote down vote up
public static Scan buildScan(RawScan rawScan) {
    Scan scan = new Scan();
    scan.setCaching(rawScan.hbaseCaching);
    scan.setMaxResultSize(rawScan.hbaseMaxResultSize);
    scan.setCacheBlocks(true);
    scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE));

    if (rawScan.startKey != null) {
        scan.setStartRow(rawScan.startKey);
    }
    if (rawScan.endKey != null) {
        scan.setStopRow(rawScan.endKey);
    }
    if (rawScan.fuzzyKeys != null) {
        applyFuzzyFilter(scan, rawScan.fuzzyKeys);
    }
    if (rawScan.hbaseColumns != null) {
        applyHBaseColums(scan, rawScan.hbaseColumns);
    }

    return scan;
}
 
Example 6
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 6 votes vote down vote up
public void testExpectedValuesOfPartialResults(boolean reversed) throws Exception {
  Scan partialScan = new Scan();
  partialScan.readAllVersions();
  // Max result size of 1 ensures that each RPC request will return a single cell. The scanner
  // will need to reconstruct the results into a complete result before returning to the caller
  partialScan.setMaxResultSize(1);
  partialScan.setReversed(reversed);
  ResultScanner partialScanner = TABLE.getScanner(partialScan);

  final int startRow = reversed ? ROWS.length - 1 : 0;
  final int endRow = reversed ? -1 : ROWS.length;
  final int loopDelta = reversed ? -1 : 1;
  String message;

  for (int row = startRow; row != endRow; row = row + loopDelta) {
    message = "Ensuring the expected keyValues are present for row " + row;
    List<Cell> expectedKeyValues = createKeyValuesForRow(ROWS[row], FAMILIES, QUALIFIERS, VALUE);
    Result result = partialScanner.next();
    assertFalse(result.mayHaveMoreCellsInRow());
    verifyResult(result, expectedKeyValues, message);
  }

  partialScanner.close();
}
 
Example 7
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * @return The approximate heap size of a cell in the test table. All cells should have
 *         approximately the same heap size, so the value is cached to avoid repeating the
 *         calculation
 * @throws Exception
 */
private long getCellHeapSize() throws Exception {
  if (CELL_HEAP_SIZE == -1) {
    // Do a partial scan that will return a single result with a single cell
    Scan scan = new Scan();
    scan.setMaxResultSize(2);
    scan.setAllowPartialResults(true);
    ResultScanner scanner = TABLE.getScanner(scan);

    Result result = scanner.next();

    assertTrue(result != null);
    assertTrue(result.rawCells() != null);
    assertTrue(result.rawCells().length == 1);

    // Estimate the cell heap size. One difference is that on server side, the KV Heap size is
    // estimated differently in case the cell is backed up by MSLAB byte[] (no overhead for
    // backing array). Thus below calculation is a bit brittle.
    CELL_HEAP_SIZE = result.rawCells()[0].heapSize() - (ClassSize.ARRAY + 3);
    if (LOG.isInfoEnabled()) LOG.info("Cell heap size: " + CELL_HEAP_SIZE);
    scanner.close();
  }

  return CELL_HEAP_SIZE;
}
 
Example 8
Source File: PcapScannerHBaseImpl.java    From opensoc-streaming with Apache License 2.0 5 votes vote down vote up
/**
 * Creates the scan request.
 * 
 * @param cf
 *          the cf
 * @param cq
 *          the cq
 * @param startKey
 *          the start key
 * @param endKey
 *          the end key
 * @param maxResultSize
 *          the max result size
 * @param startTime
 *          the start time
 * @param endTime
 *          the end time
 * @return the scan
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
@VisibleForTesting
Scan createScanRequest(byte[] cf, byte[] cq, String startKey, String endKey,
    long maxResultSize, long startTime, long endTime) throws IOException {
  Scan scan = new Scan();
  scan.addColumn(cf, cq);
  scan.setMaxVersions(ConfigurationUtil.getConfiguration().getInt(
      "hbase.table.column.maxVersions"));
  scan.setStartRow(startKey.getBytes());
  if (endKey != null) {
    scan.setStopRow(endKey.getBytes());
  }
  scan.setMaxResultSize(maxResultSize);
  boolean setTimeRange = true;
  if (startTime < 0 && endTime < 0) {
    setTimeRange = false;
  }
  if (setTimeRange) {
    if (startTime < 0) {
      startTime = 0;
    } else {
      startTime = PcapHelper.convertToDataCreationTimeUnit(startTime);
    }
    if (endTime < 0) {
      endTime = Long.MAX_VALUE;
    } else {
      endTime = PcapHelper.convertToDataCreationTimeUnit(endTime);
    }
    Assert.isTrue(startTime < endTime,
        "startTime value must be less than endTime value");
  }
  // create Scan request;
  if (setTimeRange) {
    scan.setTimeRange(startTime, endTime);
  }
  return scan;
}
 
Example 9
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 5 votes vote down vote up
public void testPartialResultsReassembly(Scan scanBase) throws Exception {
  Scan partialScan = new Scan(scanBase);
  partialScan.setMaxResultSize(1);
  partialScan.setAllowPartialResults(true);
  ResultScanner partialScanner = TABLE.getScanner(partialScan);

  Scan oneShotScan = new Scan(scanBase);
  oneShotScan.setMaxResultSize(Long.MAX_VALUE);
  ResultScanner oneShotScanner = TABLE.getScanner(oneShotScan);

  ArrayList<Result> partials = new ArrayList<>();
  for (int i = 0; i < NUM_ROWS; i++) {
    Result partialResult = null;
    Result completeResult = null;
    Result oneShotResult = null;
    partials.clear();

    do {
      partialResult = partialScanner.next();
      partials.add(partialResult);
    } while (partialResult != null && partialResult.mayHaveMoreCellsInRow());

    completeResult = Result.createCompleteResult(partials);
    oneShotResult = oneShotScanner.next();

    compareResults(completeResult, oneShotResult, null);
  }

  assertTrue(oneShotScanner.next() == null);
  assertTrue(partialScanner.next() == null);

  oneShotScanner.close();
  partialScanner.close();
}
 
Example 10
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 5 votes vote down vote up
public void testPartialResultsAndBatch(final int batch, final int cellsPerPartialResult)
    throws Exception {
  if (LOG.isInfoEnabled()) {
    LOG.info("batch: " + batch + " cellsPerPartialResult: " + cellsPerPartialResult);
  }

  Scan scan = new Scan();
  scan.setMaxResultSize(getResultSizeForNumberOfCells(cellsPerPartialResult));
  scan.setBatch(batch);
  ResultScanner scanner = TABLE.getScanner(scan);
  Result result = scanner.next();
  int repCount = 0;

  while ((result = scanner.next()) != null) {
    assertTrue(result.rawCells() != null);

    if (result.mayHaveMoreCellsInRow()) {
      final String error =
          "Cells:" + result.rawCells().length + " Batch size:" + batch
              + " cellsPerPartialResult:" + cellsPerPartialResult + " rep:" + repCount;
      assertTrue(error, result.rawCells().length == batch);
    } else {
      assertTrue(result.rawCells().length <= batch);
    }
    repCount++;
  }

  scanner.close();
}
 
Example 11
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 5 votes vote down vote up
public void testExpectedNumberOfCellsPerPartialResult(Scan baseScan, int expectedNumberOfCells)
    throws Exception {

  if (LOG.isInfoEnabled()) LOG.info("groupSize:" + expectedNumberOfCells);

  // Use the cellHeapSize to set maxResultSize such that we know how many cells to expect back
  // from the call. The returned results should NOT exceed expectedNumberOfCells but may be less
  // than it in cases where expectedNumberOfCells is not an exact multiple of the number of
  // columns in the table.
  Scan scan = new Scan(baseScan);
  scan.setAllowPartialResults(true);
  scan.setMaxResultSize(getResultSizeForNumberOfCells(expectedNumberOfCells));

  ResultScanner scanner = TABLE.getScanner(scan);
  Result result = null;
  byte[] prevRow = null;
  while ((result = scanner.next()) != null) {
    assertTrue(result.rawCells() != null);

    // Cases when cell count won't equal expectedNumberOfCells:
    // 1. Returned result is the final result needed to form the complete result for that row
    // 2. It is the first result we have seen for that row and thus may have been fetched as
    // the last group of cells that fit inside the maxResultSize
    assertTrue(
        "Result's cell count differed from expected number. result: " + result,
        result.rawCells().length == expectedNumberOfCells || !result.mayHaveMoreCellsInRow()
            || !Bytes.equals(prevRow, result.getRow()));
    prevRow = result.getRow();
  }

  scanner.close();
}
 
Example 12
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Order of cells in partial results matches the ordering of cells from complete results
 * @throws Exception
 */
@Test
public void testOrderingOfCellsInPartialResults() throws Exception {
  Scan scan = new Scan();

  for (int col = 1; col <= NUM_COLS; col++) {
    scan.setMaxResultSize(getResultSizeForNumberOfCells(col));
    testOrderingOfCellsInPartialResults(scan);

    // Test again with a reversed scanner
    scan.setReversed(true);
    testOrderingOfCellsInPartialResults(scan);
  }
}
 
Example 13
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Ensure that the results returned from a scanner that retrieves all results in a single RPC call
 * matches the results that are returned from a scanner that must incrementally combine partial
 * results into complete results. A variety of scan configurations can be tested
 * @throws Exception
 */
@Test
public void testEquivalenceOfScanResults() throws Exception {
  Scan oneShotScan = new Scan();
  oneShotScan.setMaxResultSize(Long.MAX_VALUE);

  Scan partialScan = new Scan(oneShotScan);
  partialScan.setMaxResultSize(1);

  testEquivalenceOfScanResults(TABLE, oneShotScan, partialScan);
}
 
Example 14
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Ensure that we only see Results marked as partial when the allowPartial flag is set
 * @throws Exception
 */
@Test
public void testAllowPartialResults() throws Exception {
  Scan scan = new Scan();
  scan.setAllowPartialResults(true);
  scan.setMaxResultSize(1);
  ResultScanner scanner = TABLE.getScanner(scan);
  Result result = scanner.next();

  assertTrue(result != null);
  assertTrue(result.mayHaveMoreCellsInRow());
  assertTrue(result.rawCells() != null);
  assertTrue(result.rawCells().length == 1);

  scanner.close();

  scan.setAllowPartialResults(false);
  scanner = TABLE.getScanner(scan);
  result = scanner.next();

  assertTrue(result != null);
  assertTrue(!result.mayHaveMoreCellsInRow());
  assertTrue(result.rawCells() != null);
  assertTrue(result.rawCells().length == NUM_COLS);

  scanner.close();
}
 
Example 15
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Test
public void testReversedCompleteResultWhenRegionMove() throws IOException {
  Table table = createTestTable(TableName.valueOf(name.getMethodName()),
      ROWS, FAMILIES, QUALIFIERS, VALUE);

  moveRegion(table, 1);

  Scan scan = new Scan();
  scan.setMaxResultSize(1);
  scan.setCaching(1);
  scan.setReversed(true);
  ResultScanner scanner = table.getScanner(scan);

  Result result1 = scanner.next();
  assertEquals(NUM_FAMILIES*NUM_QUALIFIERS, result1.rawCells().length);
  Cell c1 = result1.rawCells()[0];
  assertCell(c1, ROWS[NUM_ROWS-1], FAMILIES[0], QUALIFIERS[0]);
  assertFalse(result1.mayHaveMoreCellsInRow());

  moveRegion(table, 2);

  Result result2 = scanner.next();
  assertEquals(NUM_FAMILIES*NUM_QUALIFIERS, result2.rawCells().length);
  Cell c2 = result2.rawCells()[0];
  assertCell(c2, ROWS[NUM_ROWS-2], FAMILIES[0], QUALIFIERS[0]);
  assertFalse(result2.mayHaveMoreCellsInRow());

  moveRegion(table, 3);

  Result result3 = scanner.next();
  assertEquals(NUM_FAMILIES*NUM_QUALIFIERS, result3.rawCells().length);
  Cell c3 = result3.rawCells()[0];
  assertCell(c3, ROWS[NUM_ROWS-3], FAMILIES[0], QUALIFIERS[0]);
  assertFalse(result3.mayHaveMoreCellsInRow());

}
 
Example 16
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Test
public void testCompleteResultWhenRegionMove() throws IOException {
  Table table = createTestTable(TableName.valueOf(name.getMethodName()),
      ROWS, FAMILIES, QUALIFIERS, VALUE);

  moveRegion(table, 1);

  Scan scan = new Scan();
  scan.setMaxResultSize(1);
  scan.setCaching(1);
  ResultScanner scanner = table.getScanner(scan);

  Result result1 = scanner.next();
  assertEquals(NUM_FAMILIES * NUM_QUALIFIERS, result1.rawCells().length);
  Cell c1 = result1.rawCells()[0];
  assertCell(c1, ROWS[0], FAMILIES[0], QUALIFIERS[0]);
  assertFalse(result1.mayHaveMoreCellsInRow());

  moveRegion(table, 2);

  Result result2 = scanner.next();
  assertEquals(NUM_FAMILIES * NUM_QUALIFIERS, result2.rawCells().length);
  Cell c2 = result2.rawCells()[0];
  assertCell(c2, ROWS[1], FAMILIES[0], QUALIFIERS[0]);
  assertFalse(result2.mayHaveMoreCellsInRow());

  moveRegion(table, 3);

  Result result3 = scanner.next();
  assertEquals(NUM_FAMILIES * NUM_QUALIFIERS, result3.rawCells().length);
  Cell c3 = result3.rawCells()[0];
  assertCell(c3, ROWS[2], FAMILIES[0], QUALIFIERS[0]);
  assertFalse(result3.mayHaveMoreCellsInRow());

}
 
Example 17
Source File: PcapGetterHBaseImpl.java    From opensoc-streaming with Apache License 2.0 5 votes vote down vote up
/**
 * Creates the scan request.
 * 
 * @param pcapsResponse
 *          the pcaps response
 * @param keysMap
 *          the keys map
 * @param startTime
 *          the start time
 * @param endTime
 *          the end time
 * @param maxResultSize
 *          the max result size
 * @return the scan
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
@VisibleForTesting
Scan createScanRequest(PcapsResponse pcapsResponse,
    Map<String, String> keysMap, long startTime, long endTime,
    long maxResultSize) throws IOException {
  Scan scan = new Scan();
  // set column family, qualifier
  scan.addColumn(ConfigurationUtil.getColumnFamily(),
      ConfigurationUtil.getColumnQualifier());

  // set start and stop keys
  scan.setStartRow(keysMap.get(HBaseConfigConstants.START_KEY).getBytes());
  scan.setStopRow(keysMap.get(HBaseConfigConstants.END_KEY).getBytes());

  // set max results size : remaining size = max results size - ( current
  // pcaps response size + possible maximum row size)
  long remainingSize = maxResultSize
      - (pcapsResponse.getResponseSize() + ConfigurationUtil.getMaxRowSize());

  if (remainingSize > 0) {
    scan.setMaxResultSize(remainingSize);
  }
  // set max versions
  scan.setMaxVersions(ConfigurationUtil.getConfiguration().getInt(
      "hbase.table.column.maxVersions"));

  // set time range
  setTimeRangeOnScan(scan, startTime, endTime);
  return scan;
}
 
Example 18
Source File: TestPartialResultsFromClientSide.java    From hbase with Apache License 2.0 4 votes vote down vote up
public void testOrderingOfCellsInPartialResults(final Scan basePartialScan) throws Exception {
  // Scan that retrieves results in pieces (partials). By setting allowPartialResults to be true
  // the results will NOT be reconstructed and instead the caller will see the partial results
  // returned by the server
  Scan partialScan = new Scan(basePartialScan);
  partialScan.setAllowPartialResults(true);
  ResultScanner partialScanner = TABLE.getScanner(partialScan);

  // Scan that retrieves all table results in single RPC request
  Scan oneShotScan = new Scan(basePartialScan);
  oneShotScan.setMaxResultSize(Long.MAX_VALUE);
  oneShotScan.setCaching(ROWS.length);
  ResultScanner oneShotScanner = TABLE.getScanner(oneShotScan);

  Result oneShotResult = oneShotScanner.next();
  Result partialResult = null;
  int iterationCount = 0;

  while (oneShotResult != null && oneShotResult.rawCells() != null) {
    List<Cell> aggregatePartialCells = new ArrayList<>();
    do {
      partialResult = partialScanner.next();
      assertTrue("Partial Result is null. iteration: " + iterationCount, partialResult != null);
      assertTrue("Partial cells are null. iteration: " + iterationCount,
          partialResult.rawCells() != null);

      for (Cell c : partialResult.rawCells()) {
        aggregatePartialCells.add(c);
      }
    } while (partialResult.mayHaveMoreCellsInRow());

    assertTrue("Number of cells differs. iteration: " + iterationCount,
        oneShotResult.rawCells().length == aggregatePartialCells.size());
    final Cell[] oneShotCells = oneShotResult.rawCells();
    for (int cell = 0; cell < oneShotCells.length; cell++) {
      Cell oneShotCell = oneShotCells[cell];
      Cell partialCell = aggregatePartialCells.get(cell);

      assertTrue("One shot cell was null", oneShotCell != null);
      assertTrue("Partial cell was null", partialCell != null);
      assertTrue("Cell differs. oneShotCell:" + oneShotCell + " partialCell:" + partialCell,
          oneShotCell.equals(partialCell));
    }

    oneShotResult = oneShotScanner.next();
    iterationCount++;
  }

  assertTrue(partialScanner.next() == null);

  partialScanner.close();
  oneShotScanner.close();
}
 
Example 19
Source File: SlicedRowFilterGTSDecoderIterator.java    From warp10-platform with Apache License 2.0 4 votes vote down vote up
public SlicedRowFilterGTSDecoderIterator(long now, long timespan, List<Metadata> metadatas, Connection conn, TableName tableName, byte[] colfam, boolean writeTimestamp, KeyStore keystore, boolean useBlockCache) {
    
  this.keystore = keystore;
  this.now = now;
  this.timespan = timespan;
  this.hbaseAESKey = keystore.getKey(KeyStore.AES_HBASE_DATA);
  this.writeTimestamp = writeTimestamp;
  
  //
  // Check that if 'timespan' is < 0 then 'now' is either Long.MAX_VALUE or congruent to 0 modulo DEFAULT_MODULUS
  //
  
  if (timespan < 0) {
    if (Long.MAX_VALUE != now && 0 != (now % Constants.DEFAULT_MODULUS)) {
      throw new RuntimeException("Incompatible 'timespan' (" + timespan + ") and 'now' (" + now + ")");
    }
  }
  
  //
  // Create a SlicedRowFilter for the prefix, class id, labels id and ts
  // We include the prefix so we exit the filter early when the last
  // matching row has been reached
  //
  
  // 128BITS
  
  int[] bounds = { 0, 24 };
  
  //
  // Create singleton for each classId/labelsId combo
  //
  // TODO(hbs): we should really create multiple scanner, one per class Id for example,
  // 
  
  List<Pair<byte[], byte[]>> ranges = new ArrayList<Pair<byte[], byte[]>>();
  
  for (Metadata metadata: metadatas) {
    byte[][] keys = getKeys(metadata, now, timespan);
    byte[] lower = keys[0];
    byte[] upper = keys[1];
    
    this.metadatas.put(new String(Arrays.copyOfRange(lower, prefix.length, prefix.length + 16), StandardCharsets.ISO_8859_1), metadata);
    
    Pair<byte[],byte[]> range = new Pair<byte[],byte[]>(lower, upper);
    
    ranges.add(range);
  }
              
  SlicedRowFilter filter = new SlicedRowFilter(bounds, ranges, timespan < 0 ? -timespan : Long.MAX_VALUE);

  //
  // Create scanner. The start key is the lower bound of the first range
  //
  
  Scan scan = new Scan();
  scan.addFamily(colfam); // (HBaseStore.GTS_COLFAM, Longs.toByteArray(Long.MAX_VALUE - modulus));
  scan.setStartRow(filter.getStartKey());
  byte[] filterStopKey = filter.getStopKey();
  // Add one byte at the end (we can do that because we know the slice is the whole key)
  byte[] stopRow = Arrays.copyOf(filterStopKey, filterStopKey.length + 1);
  scan.setStopRow(stopRow);
  scan.setFilter(filter);
  
  scan.setMaxResultSize(1000000L);
  scan.setBatch(50000);
  scan.setCaching(50000);
  
  scan.setCacheBlocks(useBlockCache);

  Sensision.update(SensisionConstants.SENSISION_CLASS_CONTINUUM_HBASE_CLIENT_FILTERED_SCANNERS, Sensision.EMPTY_LABELS, 1);
  Sensision.update(SensisionConstants.SENSISION_CLASS_CONTINUUM_HBASE_CLIENT_FILTERED_SCANNERS_RANGES, Sensision.EMPTY_LABELS, ranges.size());

  try {
    this.htable = conn.getTable(tableName);
    this.scanner = this.htable.getScanner(scan);
    iter = scanner.iterator();          
  } catch (IOException ioe) {
    LOG.error("",ioe);
    this.iter = null;
  }
}
 
Example 20
Source File: HBaseResourceStore.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
private void tuneScanParameters(Scan scan) {
    scan.setCaching(kylinConfig.getHBaseScanCacheRows());

    scan.setMaxResultSize(kylinConfig.getHBaseScanMaxResultSize());
    scan.setCacheBlocks(true);
}