Java Code Examples for org.apache.hadoop.hbase.client.Scan#setStartRow()

The following examples show how to use org.apache.hadoop.hbase.client.Scan#setStartRow() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: HbaseMapResponseTimeDao.java From pinpoint with Apache License 2.0

6 votes

private Scan createScan(Application application, Range range, byte[] family) {
    range = rangeFactory.createStatisticsRange(range);
    if (logger.isDebugEnabled()) {
        logger.debug("scan time:{} ", range.prettyToString());
    }

    // start key is replaced by end key because timestamp has been reversed
    byte[] startKey = ApplicationMapStatisticsUtils.makeRowKey(application.getName(), application.getServiceTypeCode(), range.getTo());
    byte[] endKey = ApplicationMapStatisticsUtils.makeRowKey(application.getName(), application.getServiceTypeCode(), range.getFrom());

    final Scan scan = new Scan();
    scan.setCaching(this.scanCacheSize);
    scan.setStartRow(startKey);
    scan.setStopRow(endKey);
    scan.addFamily(family);
    scan.setId("ApplicationSelfScan");

    return scan;
}

Example 2

Source File: CubeHBaseRPC.java From kylin with Apache License 2.0

6 votes

public static Scan buildScan(RawScan rawScan) {
    Scan scan = new Scan();
    scan.setCaching(rawScan.hbaseCaching);
    scan.setMaxResultSize(rawScan.hbaseMaxResultSize);
    scan.setCacheBlocks(true);
    scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.TRUE));

    if (rawScan.startKey != null) {
        scan.setStartRow(rawScan.startKey);
    }
    if (rawScan.endKey != null) {
        scan.setStopRow(rawScan.endKey);
    }
    if (rawScan.fuzzyKeys != null) {
        applyFuzzyFilter(scan, rawScan.fuzzyKeys);
    }
    if (rawScan.hbaseColumns != null) {
        applyHBaseColums(scan, rawScan.hbaseColumns);
    }

    return scan;
}

Example 3

Source File: HbaseAgentInfoDao.java From pinpoint with Apache License 2.0

6 votes

private Scan createScan(String agentId, long currentTime) {
    Scan scan = new Scan();

    byte[] agentIdBytes = Bytes.toBytes(agentId);
    long startTime = TimeUtils.reverseTimeMillis(currentTime);
    byte[] startKeyBytes = RowKeyUtils.concatFixedByteAndLong(agentIdBytes, HbaseTableConstatns.AGENT_NAME_MAX_LEN, startTime);
    byte[] endKeyBytes = RowKeyUtils.concatFixedByteAndLong(agentIdBytes, HbaseTableConstatns.AGENT_NAME_MAX_LEN, Long.MAX_VALUE);

    scan.setStartRow(startKeyBytes);
    scan.setStopRow(endKeyBytes);
    scan.addFamily(descriptor.getColumnFamilyName());

    scan.setMaxVersions(1);
    scan.setCaching(SCANNER_CACHING);

    return scan;
}

Example 4

Source File: HBaseReader.java From geowave with Apache License 2.0

6 votes

protected Scan getMultiScanner(final FilterList filterList) {
  // Single scan w/ multiple ranges
  final Scan multiScanner = scanProvider.get();
  final List<ByteArrayRange> ranges = readerParams.getQueryRanges().getCompositeQueryRanges();

  final MultiRowRangeFilter filter = operations.getMultiRowRangeFilter(ranges);
  if (filter != null) {
    filterList.addFilter(filter);

    final List<RowRange> rowRanges = filter.getRowRanges();
    multiScanner.setStartRow(rowRanges.get(0).getStartRow());

    final RowRange stopRowRange = rowRanges.get(rowRanges.size() - 1);
    byte[] stopRowExclusive;
    if (stopRowRange.isStopRowInclusive()) {
      // because the end is always exclusive, to make an inclusive
      // stop row into exlusive all we need to do is add a traling 0
      stopRowExclusive = HBaseUtils.getInclusiveEndKey(stopRowRange.getStopRow());
    } else {
      stopRowExclusive = stopRowRange.getStopRow();
    }
    multiScanner.setStopRow(stopRowExclusive);
  }
  return multiScanner;
}

Example 5

Source File: DefaultParallelIteratorsRegionSplitterTest.java From phoenix with BSD 3-Clause "New" or "Revised" License

6 votes

@Test
public void testGetLowerUnboundSplits() throws Exception {
    long ts = nextTimestamp();
    initTableValues(ts);
    String url = getUrl() + ";" + PhoenixRuntime.CURRENT_SCN_ATTRIB + "=" + ts;
    Properties props = new Properties(TEST_PROPERTIES);
    Connection conn = DriverManager.getConnection(url, props);

    Scan scan = new Scan();
    
    ConnectionQueryServices services = driver.getConnectionQueryServices(getUrl(), TEST_PROPERTIES);
    TableRef table = getTableRef(conn,ts);
    services.getStatsManager().updateStats(table);
    scan.setStartRow(HConstants.EMPTY_START_ROW);
    scan.setStopRow(K1);
    List<KeyRange> keyRanges = getSplits(conn, ts, scan);
    assertEquals("Unexpected number of splits: " + keyRanges, 3, keyRanges.size());
    assertEquals(newKeyRange(KeyRange.UNBOUND, new byte[] {'7'}), keyRanges.get(0));
    assertEquals(newKeyRange(new byte[] {'7'}, new byte[] {'M'}), keyRanges.get(1));
    assertEquals(newKeyRange(new byte[] {'M'}, K3), keyRanges.get(2));
}

Example 6

Source File: JobHistoryService.java From hraven with Apache License 2.0

5 votes

/**
 * Returns the most recent {@link Flow} runs within that time range, up to
 * {@code limit} instances. If the {@code version} parameter is non-null, the
 * returned results will be restricted to those matching this app version.
 *
 * @param cluster the cluster where the jobs were run
 * @param user the user running the jobs
 * @param appId the application identifier for the jobs
 * @param version if non-null, only flows matching this application version
 *          will be returned
 * @param startTime the start time for the flows to be looked at
 * @param endTime the end time for the flows to be looked at
 * @param populateTasks if {@code true}, then TaskDetails will be populated
 *          for each job
 * @param limit the maximum number of flows to return
 * @return
 */
public List<Flow> getFlowSeries(String cluster, String user, String appId,
    String version, boolean populateTasks, long startTime, long endTime,
    int limit) throws IOException {
  // TODO: use RunMatchFilter to limit scan on the server side
  byte[] rowPrefix = Bytes.toBytes(
      cluster + Constants.SEP + user + Constants.SEP + appId + Constants.SEP);
  Scan scan = createFlowScan(rowPrefix, limit, version);

  // set the start and stop rows for scan so that it's time bound
  if (endTime != 0) {
    byte[] scanStartRow;
    // use end time in start row, if present
    long endRunId = FlowKey.encodeRunId(endTime);
    scanStartRow =
        Bytes.add(rowPrefix, Bytes.toBytes(endRunId), Constants.SEP_BYTES);
    scan.setStartRow(scanStartRow);
  }

  if (startTime != 0) {
    byte[] scanStopRow;
    // use start time in stop row, if present
    long stopRunId = FlowKey.encodeRunId(startTime);
    scanStopRow =
        Bytes.add(rowPrefix, Bytes.toBytes(stopRunId), Constants.SEP_BYTES);
    scan.setStopRow(scanStopRow);
  }
  return createFromResults(scan, populateTasks, limit);
}

Example 7

Source File: UngroupedAggregateRegionObserver.java From phoenix with Apache License 2.0

5 votes

@Override
public RegionScanner preScannerOpen(ObserverContext<RegionCoprocessorEnvironment> e, Scan scan, RegionScanner s)
        throws IOException {
    s = super.preScannerOpen(e, scan, s);
    if (ScanUtil.isAnalyzeTable(scan)) {
        // We are setting the start row and stop row such that it covers the entire region. As part
        // of Phonenix-1263 we are storing the guideposts against the physical table rather than 
        // individual tenant specific tables.
        scan.setStartRow(HConstants.EMPTY_START_ROW);
        scan.setStopRow(HConstants.EMPTY_END_ROW);
        scan.setFilter(null);
    }
    return s;
}

Example 8

Source File: PcapScannerHBaseImpl.java From opensoc-streaming with Apache License 2.0

5 votes

/**
 * Creates the scan request.
 * 
 * @param cf
 *          the cf
 * @param cq
 *          the cq
 * @param startKey
 *          the start key
 * @param endKey
 *          the end key
 * @param maxResultSize
 *          the max result size
 * @param startTime
 *          the start time
 * @param endTime
 *          the end time
 * @return the scan
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
@VisibleForTesting
Scan createScanRequest(byte[] cf, byte[] cq, String startKey, String endKey,
    long maxResultSize, long startTime, long endTime) throws IOException {
  Scan scan = new Scan();
  scan.addColumn(cf, cq);
  scan.setMaxVersions(ConfigurationUtil.getConfiguration().getInt(
      "hbase.table.column.maxVersions"));
  scan.setStartRow(startKey.getBytes());
  if (endKey != null) {
    scan.setStopRow(endKey.getBytes());
  }
  scan.setMaxResultSize(maxResultSize);
  boolean setTimeRange = true;
  if (startTime < 0 && endTime < 0) {
    setTimeRange = false;
  }
  if (setTimeRange) {
    if (startTime < 0) {
      startTime = 0;
    } else {
      startTime = PcapHelper.convertToDataCreationTimeUnit(startTime);
    }
    if (endTime < 0) {
      endTime = Long.MAX_VALUE;
    } else {
      endTime = PcapHelper.convertToDataCreationTimeUnit(endTime);
    }
    Assert.isTrue(startTime < endTime,
        "startTime value must be less than endTime value");
  }
  // create Scan request;
  if (setTimeRange) {
    scan.setTimeRange(startTime, endTime);
  }
  return scan;
}

Example 9

Source File: HBaseLogReader.java From eagle with Apache License 2.0

5 votes

/**
 * TODO If the required field is null for a row, then this row will not be fetched. That could be a
 * problem for counting Need another version of read to strictly get the number of rows which will return
 * all the columns for a column family
 */
@Override
public void open() throws IOException {
    if (isOpen) {
        return; // silently return
    }
    try {
        tbl = EagleConfigFactory.load().getHTable(schema.getTable());
    } catch (RuntimeException ex) {
        throw new IOException(ex);
    }

    String rowkeyRegex = buildRegex2(searchTags);
    RegexStringComparator regexStringComparator = new RegexStringComparator(rowkeyRegex);
    regexStringComparator.setCharset(Charset.forName("ISO-8859-1"));
    RowFilter filter = new RowFilter(CompareOp.EQUAL, regexStringComparator);
    FilterList filterList = new FilterList();
    filterList.addFilter(filter);
    Scan s1 = new Scan();
    // reverse timestamp, startRow is stopKey, and stopRow is startKey
    s1.setStartRow(stopKey);
    s1.setStopRow(startKey);
    s1.setFilter(filterList);
    // TODO the # of cached rows should be minimum of (pagesize and 100)
    s1.setCaching(100);
    // TODO not optimized for all applications
    s1.setCacheBlocks(true);
    // scan specified columnfamily and qualifiers
    for (byte[] qualifier : qualifiers) {
        s1.addColumn(schema.getColumnFamily().getBytes(), qualifier);
    }
    rs = tbl.getScanner(s1);
    isOpen = true;
}

Example 10

Source File: FlappingLocalIndexIT.java From phoenix with Apache License 2.0

5 votes

@Test
public void testBuildIndexWhenUserTableAlreadyHasData() throws Exception {
    String tableName = schemaName + "." + generateUniqueName();
    String indexName = "IDX_" + generateUniqueName();
    String indexTableName = schemaName + "." + indexName;
    TableName physicalTableName = SchemaUtil.getPhysicalTableName(tableName.getBytes(), isNamespaceMapped);
    String indexPhysicalTableName = physicalTableName.getNameAsString();

    createBaseTable(tableName, null, "('e','i','o')");
    Connection conn1 = DriverManager.getConnection(getUrl());
    conn1.createStatement().execute("UPSERT INTO "+tableName+" values('b',1,2,4,'z')");
    conn1.createStatement().execute("UPSERT INTO "+tableName+" values('f',1,2,3,'z')");
    conn1.createStatement().execute("UPSERT INTO "+tableName+" values('j',2,4,2,'a')");
    conn1.createStatement().execute("UPSERT INTO "+tableName+" values('q',3,1,1,'c')");
    conn1.commit();
    conn1.createStatement().execute("CREATE LOCAL INDEX " + indexName + " ON " + tableName + "(v1)");
    ResultSet rs = conn1.createStatement().executeQuery("SELECT COUNT(*) FROM " + indexTableName);
    assertTrue(rs.next());
    assertEquals(4, rs.getInt(1));
    Admin admin = driver.getConnectionQueryServices(getUrl(), TestUtil.TEST_PROPERTIES).getAdmin();
    org.apache.hadoop.hbase.client.Connection hbaseConn = admin.getConnection();
    Table indexTable = hbaseConn.getTable(TableName.valueOf(indexPhysicalTableName));
    Pair<byte[][], byte[][]> startEndKeys = hbaseConn.getRegionLocator(TableName.valueOf(indexPhysicalTableName)).getStartEndKeys();
    byte[][] startKeys = startEndKeys.getFirst();
    byte[][] endKeys = startEndKeys.getSecond();
    for (int i = 0; i < startKeys.length; i++) {
        Scan s = new Scan();
        s.addFamily(QueryConstants.DEFAULT_LOCAL_INDEX_COLUMN_FAMILY_BYTES);
        s.setStartRow(startKeys[i]);
        s.setStopRow(endKeys[i]);
        ResultScanner scanner = indexTable.getScanner(s);
        int count = 0;
        for(Result r:scanner){
            count++;
        }
        scanner.close();
        assertEquals(1, count);
    }
    indexTable.close();
}

Example 11

Source File: ScanUtil.java From phoenix with Apache License 2.0

5 votes

public static void setupReverseScan(Scan scan) {
    if (isReversed(scan)) {
        byte[] startRow = scan.getStartRow();
        byte[] stopRow = scan.getStopRow();
        byte[] newStartRow = startRow;
        byte[] newStopRow = stopRow;
        if (startRow.length != 0) {
            /*
             * Must get previous key because this is going from an inclusive start key to an exclusive stop key, and
             * we need the start key to be included. We get the previous key by decrementing the last byte by one.
             * However, with variable length data types, we need to fill with the max byte value, otherwise, if the
             * start key is 'ab', we lower it to 'aa' which would cause 'aab' to be included (which isn't correct).
             * So we fill with a 0xFF byte to prevent this. A single 0xFF would be enough for our primitive types (as
             * that byte wouldn't occur), but for an arbitrary VARBINARY key we can't know how many bytes to tack
             * on. It's lame of HBase to force us to do this.
             */
            newStartRow = Arrays.copyOf(startRow, startRow.length + MAX_FILL_LENGTH_FOR_PREVIOUS_KEY.length);
            if (ByteUtil.previousKey(newStartRow, startRow.length)) {
                System.arraycopy(MAX_FILL_LENGTH_FOR_PREVIOUS_KEY, 0, newStartRow, startRow.length, MAX_FILL_LENGTH_FOR_PREVIOUS_KEY.length);
            } else {
                newStartRow = HConstants.EMPTY_START_ROW;
            }
        }
        if (stopRow.length != 0) {
            // Must add null byte because we need the start to be exclusive while it was inclusive
            newStopRow = ByteUtil.concat(stopRow, QueryConstants.SEPARATOR_BYTE_ARRAY);
        }
        scan.setStartRow(newStopRow);
        scan.setStopRow(newStartRow);
        scan.setReversed(true);
    }
}

Example 12

Source File: BigtableStorage.java From styx with Apache License 2.0

5 votes

List<WorkflowInstanceExecutionData> executionData(WorkflowId workflowId, String offset, int limit)
    throws IOException {
  try (final Table eventsTable = connection.getTable(EVENTS_TABLE_NAME)) {
    final Scan scan = new Scan()
        .setRowPrefixFilter(Bytes.toBytes(workflowId.toKey() + '#'))
        .setFilter(new FirstKeyOnlyFilter());

    if (!Strings.isNullOrEmpty(offset)) {
      final WorkflowInstance offsetInstance = WorkflowInstance.create(workflowId, offset);
      scan.setStartRow(Bytes.toBytes(offsetInstance.toKey() + '#'));
    }

    final Set<WorkflowInstance> workflowInstancesSet = Sets.newHashSet();
    try (ResultScanner scanner = eventsTable.getScanner(scan)) {
      Result result = scanner.next();
      while (result != null) {
        final String key = new String(result.getRow());
        final int lastHash = key.lastIndexOf('#');

        final WorkflowInstance wfi = WorkflowInstance.parseKey(key.substring(0, lastHash));
        workflowInstancesSet.add(wfi);
        if (workflowInstancesSet.size() == limit) {
          break;
        }

        result = scanner.next();
      }
    }

    return executionData(workflowInstancesSet);
  }
}

Example 13

Source File: HbaseAgentInfoDao.java From pinpoint with Apache License 2.0

5 votes

private Scan createScanForInitialAgentInfo(String agentId) {
    Scan scan = new Scan();
    byte[] agentIdBytes = Bytes.toBytes(agentId);
    byte[] reverseStartKey = RowKeyUtils.concatFixedByteAndLong(agentIdBytes, HbaseTableConstatns.AGENT_NAME_MAX_LEN, Long.MAX_VALUE);
    scan.setStartRow(reverseStartKey);
    scan.setReversed(true);
    scan.setMaxVersions(1);
    scan.setCaching(SCANNER_CACHING);
    return scan;
}

Example 14

Source File: ValidateWordCount.java From cloud-bigtable-examples with Apache License 2.0

5 votes

@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
  Configuration conf = HBaseConfiguration.create();
  String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("Usage: java -cp <this jar>:<hbase classpath> "
        + ValidateWordCount.class.getName() + " <table-name> <expected count>");
    System.exit(2);
  }

  TableName tableName = TableName.valueOf(otherArgs[0]);
  int expectedCount = Integer.parseInt(otherArgs[1]);

  Scan scan = new Scan();
  scan.addFamily(Bytes.toBytes("cf"));
  scan.setStartRow(Bytes.toBytes(""));
  int count = 0;

  try (Connection conn = ConnectionFactory.createConnection(conf);
      Table table = conn.getTable(tableName);
      ResultScanner rs = table.getScanner(scan)) {
    for (Result result : rs) {
      count++;
    }
  }

  System.out.println("Count: " + count + ".  Expected: " + expectedCount);
  System.exit(count == expectedCount ? 0 : 1);
}

Example 15

Source File: ActiveUserRunner.java From BigDataArchitect with Apache License 2.0

5 votes

/**
 * 初始化scan集合
 * 
 * @param job
 * @return
 */
private List<Scan> initScans(Job job) {
    Configuration conf = job.getConfiguration();
    // 获取运行时间: yyyy-MM-dd
    String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
    long startDate = TimeUtil.parseString2Long(date);
    long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;

    Scan scan = new Scan();
    // 定义hbase扫描的开始rowkey和结束rowkey
    scan.setStartRow(Bytes.toBytes("" + startDate));
    scan.setStopRow(Bytes.toBytes("" + endDate));

    FilterList filterList = new FilterList();
    // 定义mapper中需要获取的列名
    String[] columns = new String[] { 
    		EventLogConstants.LOG_COLUMN_NAME_UUID, // 用户id
            EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, // 服务器时间
            EventLogConstants.LOG_COLUMN_NAME_PLATFORM, // 平台名称
            EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, // 浏览器名称
            EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION // 浏览器版本号
    };
    filterList.addFilter(this.getColumnFilter(columns));

    scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
    scan.setFilter(filterList);
    return Lists.newArrayList(scan);
}

Example 16

Source File: BaseResultIterators.java From phoenix with Apache License 2.0

4 votes

/**
 * Get parallel scans of the specified scan boundaries. This can be used for getting parallel
 * scans when there is split/merges while scanning a chunk. In this case we need not go by all
 * the regions or guideposts.
 * @param scan
 * @return
 * @throws SQLException
 */
private List<List<Scan>> getParallelScans(Scan scan) throws SQLException {
    List<HRegionLocation> regionLocations = getRegionBoundaries(scanGrouper);
    List<byte[]> regionBoundaries = toBoundaries(regionLocations);
    int regionIndex = 0;
    int stopIndex = regionBoundaries.size();
    if (scan.getStartRow().length > 0) {
        regionIndex = getIndexContainingInclusive(regionBoundaries, scan.getStartRow());
    }
    if (scan.getStopRow().length > 0) {
        stopIndex = Math.min(stopIndex, regionIndex + getIndexContainingExclusive(regionBoundaries.subList(regionIndex, stopIndex), scan.getStopRow()));
    }
    List<List<Scan>> parallelScans = Lists.newArrayListWithExpectedSize(stopIndex - regionIndex + 1);
    List<Scan> scans = Lists.newArrayListWithExpectedSize(2);
    while (regionIndex <= stopIndex) {
        HRegionLocation regionLocation = regionLocations.get(regionIndex);
        RegionInfo regionInfo = regionLocation.getRegion();
        Scan newScan = ScanUtil.newScan(scan);
        byte[] endKey;
        if (regionIndex == stopIndex) {
            endKey = scan.getStopRow();
        } else {
            endKey = regionBoundaries.get(regionIndex);
        }
        if(ScanUtil.isLocalIndex(scan)) {
            ScanUtil.setLocalIndexAttributes(newScan, 0, regionInfo.getStartKey(),
                regionInfo.getEndKey(), newScan.getAttribute(SCAN_START_ROW_SUFFIX),
                newScan.getAttribute(SCAN_STOP_ROW_SUFFIX));
        } else {
            if(Bytes.compareTo(scan.getStartRow(), regionInfo.getStartKey())<=0) {
                newScan.setAttribute(SCAN_ACTUAL_START_ROW, regionInfo.getStartKey());
                newScan.setStartRow(regionInfo.getStartKey());
            }
            if(scan.getStopRow().length == 0 || (regionInfo.getEndKey().length != 0 && Bytes.compareTo(scan.getStopRow(), regionInfo.getEndKey())>0)) {
                newScan.setStopRow(regionInfo.getEndKey());
            }
        }
        scans = addNewScan(parallelScans, scans, newScan, endKey, true, regionLocation);
        regionIndex++;
    }
    if (!scans.isEmpty()) { // Add any remaining scans
        parallelScans.add(scans);
    }
    return parallelScans;
}

Example 17

Source File: SlicedRowFilterGTSDecoderIterator.java From warp10-platform with Apache License 2.0

4 votes

public SlicedRowFilterGTSDecoderIterator(long now, long timespan, List<Metadata> metadatas, Connection conn, TableName tableName, byte[] colfam, boolean writeTimestamp, KeyStore keystore, boolean useBlockCache) {
    
  this.keystore = keystore;
  this.now = now;
  this.timespan = timespan;
  this.hbaseAESKey = keystore.getKey(KeyStore.AES_HBASE_DATA);
  this.writeTimestamp = writeTimestamp;
  
  //
  // Check that if 'timespan' is < 0 then 'now' is either Long.MAX_VALUE or congruent to 0 modulo DEFAULT_MODULUS
  //
  
  if (timespan < 0) {
    if (Long.MAX_VALUE != now && 0 != (now % Constants.DEFAULT_MODULUS)) {
      throw new RuntimeException("Incompatible 'timespan' (" + timespan + ") and 'now' (" + now + ")");
    }
  }
  
  //
  // Create a SlicedRowFilter for the prefix, class id, labels id and ts
  // We include the prefix so we exit the filter early when the last
  // matching row has been reached
  //
  
  // 128BITS
  
  int[] bounds = { 0, 24 };
  
  //
  // Create singleton for each classId/labelsId combo
  //
  // TODO(hbs): we should really create multiple scanner, one per class Id for example,
  // 
  
  List<Pair<byte[], byte[]>> ranges = new ArrayList<Pair<byte[], byte[]>>();
  
  for (Metadata metadata: metadatas) {
    byte[][] keys = getKeys(metadata, now, timespan);
    byte[] lower = keys[0];
    byte[] upper = keys[1];
    
    this.metadatas.put(new String(Arrays.copyOfRange(lower, prefix.length, prefix.length + 16), StandardCharsets.ISO_8859_1), metadata);
    
    Pair<byte[],byte[]> range = new Pair<byte[],byte[]>(lower, upper);
    
    ranges.add(range);
  }
              
  SlicedRowFilter filter = new SlicedRowFilter(bounds, ranges, timespan < 0 ? -timespan : Long.MAX_VALUE);

  //
  // Create scanner. The start key is the lower bound of the first range
  //
  
  Scan scan = new Scan();
  scan.addFamily(colfam); // (HBaseStore.GTS_COLFAM, Longs.toByteArray(Long.MAX_VALUE - modulus));
  scan.setStartRow(filter.getStartKey());
  byte[] filterStopKey = filter.getStopKey();
  // Add one byte at the end (we can do that because we know the slice is the whole key)
  byte[] stopRow = Arrays.copyOf(filterStopKey, filterStopKey.length + 1);
  scan.setStopRow(stopRow);
  scan.setFilter(filter);
  
  scan.setMaxResultSize(1000000L);
  scan.setBatch(50000);
  scan.setCaching(50000);
  
  scan.setCacheBlocks(useBlockCache);

  Sensision.update(SensisionConstants.SENSISION_CLASS_CONTINUUM_HBASE_CLIENT_FILTERED_SCANNERS, Sensision.EMPTY_LABELS, 1);
  Sensision.update(SensisionConstants.SENSISION_CLASS_CONTINUUM_HBASE_CLIENT_FILTERED_SCANNERS_RANGES, Sensision.EMPTY_LABELS, ranges.size());

  try {
    this.htable = conn.getTable(tableName);
    this.scanner = this.htable.getScanner(scan);
    iter = scanner.iterator();          
  } catch (IOException ioe) {
    LOG.error("",ioe);
    this.iter = null;
  }
}

Example 18

Source File: RowCounterCLI.java From kylin with Apache License 2.0

4 votes

public static void main(String[] args) throws IOException {

        if (args == null || args.length != 3) {
            logger.info(
                    "Usage: hbase org.apache.hadoop.util.RunJar kylin-job-latest.jar org.apache.kylin.job.tools.RowCounterCLI [HTABLE_NAME] [STARTKEY] [ENDKEY]");
            return; // if no enough arguments provided, return with above message
        }

        logger.info(args[0]);
        String htableName = args[0];
        logger.info(args[1]);
        byte[] startKey = BytesUtil.fromReadableText(args[1]);
        logger.info(args[2]);
        byte[] endKey = BytesUtil.fromReadableText(args[2]);

        if (startKey == null) {
            logger.info("startkey is null ");
        } else {
            logger.info("startkey lenght: {}", startKey.length);
        }
        if(logger.isInfoEnabled()){
            logger.info("start key in binary: {}", Bytes.toStringBinary(startKey));
            logger.info("end key in binary: {}", Bytes.toStringBinary(endKey));
        }

        Configuration conf = HBaseConnection.getCurrentHBaseConfiguration();

        Scan scan = new Scan();
        scan.setCaching(512);
        scan.setCacheBlocks(true);
        scan.setStartRow(startKey);
        scan.setStopRow(endKey);

        logger.info("My Scan {}", scan);
        try (Connection conn = ConnectionFactory.createConnection(conf);
                Table tableInterface = conn.getTable(TableName.valueOf(htableName))) {
            Iterator<Result> iterator = tableInterface.getScanner(scan).iterator();
            int counter = 0;
            while (iterator.hasNext()) {
                iterator.next();
                counter++;
                if (counter % 1000 == 1) {
                    logger.info("number of rows: {}", counter);
                }
            }
            logger.info("number of rows: {}", counter);
        }
    }

Example 19

Source File: HfileBulkExporter.java From super-cloudops with Apache License 2.0

4 votes

/**
 * Setup scan condition if necessary.
 * 
 * @param conf
 * @param line
 * @throws IOException
 */
public static void setScanIfNecessary(Configuration conf, CommandLine line) throws IOException {
	String startRow = line.getOptionValue("startRow");
	String endRow = line.getOptionValue("endRow");
	String startTime = line.getOptionValue("startTime");
	String endTime = line.getOptionValue("endTime");

	boolean enabledScan = false;
	Scan scan = new Scan();
	// Row
	if (isNotBlank(startRow)) {
		conf.set(TableInputFormat.SCAN_ROW_START, startRow);
		scan.setStartRow(Bytes.toBytes(startRow));
		enabledScan = true;
	}
	if (isNotBlank(endRow)) {
		Assert2.hasText(startRow, "Argument for startRow and endRow are used simultaneously");
		conf.set(TableInputFormat.SCAN_ROW_STOP, endRow);
		scan.setStopRow(Bytes.toBytes(endRow));
		enabledScan = true;
	}

	// Row TimeStamp
	if (isNotBlank(startTime) && isNotBlank(endTime)) {
		conf.set(TableInputFormat.SCAN_TIMERANGE_START, startTime);
		conf.set(TableInputFormat.SCAN_TIMERANGE_END, endTime);
		try {
			Timestamp stime = new Timestamp(Long.parseLong(startTime));
			Timestamp etime = new Timestamp(Long.parseLong(endTime));
			scan.setTimeRange(stime.getTime(), etime.getTime());
			enabledScan = true;
		} catch (Exception e) {
			throw new IllegalArgumentException(String.format("Illegal startTime(%s) and endTime(%s)", startTime, endTime), e);
		}
	}

	if (enabledScan) {
		ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
		log.info("All other SCAN configuration are ignored if\n"
				+ "		 * this is specified.See TableMapReduceUtil.convertScanToString(Scan)\n"
				+ "		 * for more details.");
		conf.set(TableInputFormat.SCAN, Base64.encodeBytes(proto.toByteArray()));
	}
}

Example 20

Source File: GridTableHBaseBenchmark.java From kylin-on-parquet-v2 with Apache License 2.0

4 votes

private static void jumpScan(Connection conn, boolean[] hits, Stats stats) throws IOException {

        final int jumpThreshold = 6; // compensate for Scan() overhead, totally by experience

        Table table = conn.getTable(TableName.valueOf(TEST_TABLE));
        try {

            stats.markStart();

            int i = 0;
            while (i < N_ROWS) {
                // find the first hit
                int start = i;
                while (start + 1 < N_ROWS && !hits[start]) start++;

                // find the last hit within jumpThreshold
                int end = start + 1;
                int jump = end + 1;
                while (jump < N_ROWS && (end + jumpThreshold > jump)) {
                    if (hits[jump]) {
                        end = jump;
                    }
                    jump++;
                }

                if (start < N_ROWS) {
                    Scan scan = new Scan();
                    scan.setStartRow(Bytes.toBytes(start));
                    scan.setStopRow(Bytes.toBytes(end));
                    scan.addFamily(CF);
                    ResultScanner scanner = table.getScanner(scan);
                    i = start;
                    for (Result r : scanner) {
                        stats.consume(r);
                        dot(i, N_ROWS);
                        i++;
                    }
                }
                i = end;
            }

            stats.markEnd();

        } finally {
            IOUtils.closeQuietly(table);
        }
    }