Java Code Examples for org.apache.hadoop.hbase.client.Scan#setBatch()

The following examples show how to use org.apache.hadoop.hbase.client.Scan#setBatch() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
/**
 * 初始化scan集合
 * 
 * @param job
 * @return
 */
protected List<Scan> initScans(Job job) {
    Configuration conf = job.getConfiguration();
    // 获取运行时间: yyyy-MM-dd
    String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
    long startDate = TimeUtil.parseString2Long(date);
    long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;

    Scan scan = new Scan();
    // 定义hbase扫描的开始rowkey和结束rowkey
    scan.setStartRow(Bytes.toBytes("" + startDate));
    scan.setStopRow(Bytes.toBytes("" + endDate));

    scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
    Filter filter = this.fetchHbaseFilter();
    if (filter != null) {
        scan.setFilter(filter);
    }

    // 优化设置cache
    scan.setBatch(500);
    scan.setCacheBlocks(true); // 启动cache blocks
    scan.setCaching(1000); // 设置每次返回的行数,默认值100,设置较大的值可以提高速度(减少rpc操作),但是较大的值可能会导致内存异常。
    return Lists.newArrayList(scan);
}
 
Example 2
Source Project: hbase   File: TestFilterFromRegionSide.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFirstKeyOnlyFilterAndBatch() throws IOException {
  Scan scan = new Scan();
  scan.setFilter(new FirstKeyOnlyFilter());
  scan.setBatch(1);
  InternalScanner scanner = REGION.getScanner(scan);
  List<Cell> results = new ArrayList<>();
  for (int i = 0; i < NUM_ROWS; i++) {
    results.clear();
    scanner.next(results);
    assertEquals(1, results.size());
    Cell cell = results.get(0);
    assertArrayEquals(ROWS[i],
        Bytes.copy(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
  }
  assertFalse(scanner.next(results));
  scanner.close();
}
 
Example 3
Source Project: hbase   File: TestFilterFromRegionSide.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFirstSeveralCellsFilterAndBatch() throws IOException {
  Scan scan = new Scan();
  scan.setFilter(new FirstSeveralCellsFilter());
  scan.setBatch(NUM_COLS);
  InternalScanner scanner = REGION.getScanner(scan);
  List<Cell> results = new ArrayList<>();
  for (int i = 0; i < NUM_ROWS; i++) {
    results.clear();
    scanner.next(results);
    assertEquals(NUM_COLS, results.size());
    Cell cell = results.get(0);
    assertArrayEquals(ROWS[i],
        Bytes.copy(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
    assertArrayEquals(FAMILIES[0],
        Bytes.copy(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()));
    assertArrayEquals(QUALIFIERS[0], Bytes.copy(cell.getQualifierArray(),
        cell.getQualifierOffset(), cell.getQualifierLength()));
  }
  assertFalse(scanner.next(results));
  scanner.close();
}
 
Example 4
Source Project: hbase   File: HashTable.java    License: Apache License 2.0 6 votes vote down vote up
Scan initScan() throws IOException {
  Scan scan = new Scan();
  scan.setCacheBlocks(false);
  if (startTime != 0 || endTime != 0) {
    scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
  }
  if (scanBatch > 0) {
    scan.setBatch(scanBatch);
  }
  if (versions >= 0) {
    scan.readVersions(versions);
  }
  if (!isTableStartRow(startRow)) {
    scan.withStartRow(startRow);
  }
  if (!isTableEndRow(stopRow)) {
    scan.withStopRow(stopRow);
  }
  if(families != null) {
    for(String fam : families.split(",")) {
      scan.addFamily(Bytes.toBytes(fam));
    }
  }
  return scan;
}
 
Example 5
Source Project: hbase   File: IntegrationTestMTTR.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected boolean doAction() throws Exception {
  ResultScanner rs = null;
  try {
    Scan s = new Scan();
    s.setBatch(2);
    s.addFamily(FAMILY);
    s.setFilter(new KeyOnlyFilter());
    s.readVersions(1);

    rs = table.getScanner(s);
    Result result = rs.next();
    return result != null && result.size() > 0;
  } finally {
    if (rs != null) {
      rs.close();
    }
  }
}
 
Example 6
protected static CINode findStartNode(Table table, byte[] startKey) throws IOException {
  Scan scan = new Scan();
  scan.withStartRow(startKey);
  scan.setBatch(1);
  scan.addColumn(FAMILY_NAME, COLUMN_PREV);

  long t1 = System.currentTimeMillis();
  ResultScanner scanner = table.getScanner(scan);
  Result result = scanner.next();
  long t2 = System.currentTimeMillis();
  scanner.close();

  if ( result != null) {
    CINode node = getCINode(result, new CINode());
    System.out.printf("FSR %d %s\n", t2 - t1, Bytes.toStringBinary(node.key));
    return node;
  }

  System.out.println("FSR " + (t2 - t1));

  return null;
}
 
Example 7
Source Project: presto-connectors   File: HbaseClient.java    License: Apache License 2.0 5 votes vote down vote up
private static void buildScan(Scan scan, ConnectorSession session, List<HbaseColumnHandle> columnHandles)
{
    scan.setMaxVersions(HbaseSessionProperties.getScanMaxVersions(session)); //默认值为1 只返回最新的
    //指定最多返回的Cell数目。用于防止一行中有过多的数据,导致OutofMemory错误。
    scan.setBatch(HbaseSessionProperties.getScanBatchSize(session)); //一次最多返回得列数, 如果列数超过该值会被 拆分成多列
    scan.setCaching(HbaseSessionProperties.getScanBatchCaching(session));
    scan.setMaxResultSize(HbaseSessionProperties.getScanMaxResultSize(session)); //最多返回1w条

    columnHandles.forEach(column -> {
        column.getFamily().ifPresent(x -> scan.addColumn(Bytes.toBytes(x), Bytes.toBytes(column.getQualifier().get())));
    });
}
 
Example 8
Source Project: hbase   File: TestFilterWithScanLimits.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testScanWithLimit() {
  int kv_number = 0;
  try {
    Scan scan = new Scan();
    // set batch number as 2, which means each Result should contain 2 KVs at most
    scan.setBatch(2);
    SingleColumnValueFilter filter = new SingleColumnValueFilter(
        Bytes.toBytes(columnFamily), Bytes.toBytes("c5"),
    CompareOperator .EQUAL, new SubstringComparator("2_c5"));

    // add filter after batch defined
    scan.setFilter(filter);
    Table table = openTable(tableName);
    ResultScanner scanner = table.getScanner(scan);
    // Expect to get following row
    // row2 => <f1:c1, 2_c1>, <f1:c2, 2_c2>,
    // row2 => <f1:c3, 2_c3>, <f1:c4, 2_c4>,
    // row2 => <f1:c5, 2_c5>

    for (Result result : scanner) {
      for (Cell kv : result.listCells()) {
        kv_number++;
        LOG.debug(kv_number + ". kv: " + kv);
      }
    }

    scanner.close();
    table.close();
  } catch (Exception e) {
    // no correct result is expected
    assertNotNull("No IncompatibleFilterException catched", e);
  }
  LOG.debug("check the fetched kv number");
  assertEquals("We should not get result(s) returned.", 0, kv_number);
}
 
Example 9
public void testPartialResultsAndBatch(final int batch, final int cellsPerPartialResult)
    throws Exception {
  if (LOG.isInfoEnabled()) {
    LOG.info("batch: " + batch + " cellsPerPartialResult: " + cellsPerPartialResult);
  }

  Scan scan = new Scan();
  scan.setMaxResultSize(getResultSizeForNumberOfCells(cellsPerPartialResult));
  scan.setBatch(batch);
  ResultScanner scanner = TABLE.getScanner(scan);
  Result result = scanner.next();
  int repCount = 0;

  while ((result = scanner.next()) != null) {
    assertTrue(result.rawCells() != null);

    if (result.mayHaveMoreCellsInRow()) {
      final String error =
          "Cells:" + result.rawCells().length + " Batch size:" + batch
              + " cellsPerPartialResult:" + cellsPerPartialResult + " rep:" + repCount;
      assertTrue(error, result.rawCells().length == batch);
    } else {
      assertTrue(result.rawCells().length <= batch);
    }
    repCount++;
  }

  scanner.close();
}
 
Example 10
@Test
public void testMayHaveMoreCellsInRowReturnsTrueAndSetBatch() throws IOException {
  Table table = createTestTable(TableName.valueOf(name.getMethodName()), ROWS, FAMILIES,
      QUALIFIERS, VALUE);
  Scan scan = new Scan();
  scan.setBatch(1);
  scan.setFilter(new FirstKeyOnlyFilter());
  ResultScanner scanner = table.getScanner(scan);
  Result result;
  while ((result = scanner.next()) != null) {
    assertTrue(result.rawCells() != null);
    assertEquals(1, result.rawCells().length);
  }
}
 
Example 11
@Test
@Ignore
public void validateAccurateRecordsWithStoreFileAndMemstore() throws SQLException, IOException, InterruptedException{
    int i=0;
    TableName tableName=TableName.valueOf(sqlUtil.getConglomID(SCHEMA_NAME+".A"));
    try(Admin admin = connection.getAdmin()) {
        Table table = connection.getTable(tableName);
        Scan scan=new Scan();
        scan.setCaching(50);
        scan.setBatch(50);
        scan.setMaxVersions();
        scan.setAttribute(MRConstants.SPLICE_SCAN_MEMSTORE_ONLY,HConstants.EMPTY_BYTE_ARRAY);
        try(SkeletonClientSideRegionScanner clientSideRegionScanner=
                    new HBaseClientSideRegionScanner(table,
                          table.getConfiguration(), FSUtils.getCurrentFileSystem(table.getConfiguration()),
                          FSUtils.getRootDir(table.getConfiguration()),
                          table.getTableDescriptor(),
                          connection.getRegionLocator(tableName).getRegionLocation(scan.getStartRow()).getRegionInfo(),
                          scan,
                          connection.getRegionLocator(tableName).getRegionLocation(scan.getStartRow()).getHostnamePort())){
            List results=new ArrayList();
            while(clientSideRegionScanner.nextRaw(results)){
                i++;
                results.clear();
            }
        }
        Assert.assertEquals("Results Returned Are Not Accurate",500,i);
    }
}
 
Example 12
@Test
@Ignore
public void validateAccurateRecordsWithRegionFlush() throws SQLException, IOException, InterruptedException{
    int i=0;
    TableName tableName=TableName.valueOf(sqlUtil.getConglomID(SCHEMA_NAME+".A"));
    try (Admin admin = connection.getAdmin()) {
        Table table = connection.getTable(tableName);
        Scan scan = new Scan();
        scan.setCaching(50);
        scan.setBatch(50);
        scan.setMaxVersions();
        scan.setAttribute(MRConstants.SPLICE_SCAN_MEMSTORE_ONLY, HConstants.EMPTY_BYTE_ARRAY);

        try (SkeletonClientSideRegionScanner clientSideRegionScanner =
                   new HBaseClientSideRegionScanner(table,
                         table.getConfiguration(), FSUtils.getCurrentFileSystem(table.getConfiguration()),
                         FSUtils.getRootDir(table.getConfiguration()),
                         table.getTableDescriptor(),
                         connection.getRegionLocator(tableName).getRegionLocation(scan.getStartRow()).getRegionInfo(),
                         scan,
                         connection.getRegionLocator(tableName).getRegionLocation(scan.getStartRow()).getHostnamePort())) {
            List results = new ArrayList();
            while (clientSideRegionScanner.nextRaw(results)) {
                i++;
                if (i == 100)
                    admin.flush(tableName);
                results.clear();
            }
        }
        Assert.assertEquals("Results Returned Are Not Accurate", 500, i);
    }
}
 
Example 13
public SlicedRowFilterGTSDecoderIterator(long now, long timespan, List<Metadata> metadatas, Connection conn, TableName tableName, byte[] colfam, boolean writeTimestamp, KeyStore keystore, boolean useBlockCache) {
    
  this.keystore = keystore;
  this.now = now;
  this.timespan = timespan;
  this.hbaseAESKey = keystore.getKey(KeyStore.AES_HBASE_DATA);
  this.writeTimestamp = writeTimestamp;
  
  //
  // Check that if 'timespan' is < 0 then 'now' is either Long.MAX_VALUE or congruent to 0 modulo DEFAULT_MODULUS
  //
  
  if (timespan < 0) {
    if (Long.MAX_VALUE != now && 0 != (now % Constants.DEFAULT_MODULUS)) {
      throw new RuntimeException("Incompatible 'timespan' (" + timespan + ") and 'now' (" + now + ")");
    }
  }
  
  //
  // Create a SlicedRowFilter for the prefix, class id, labels id and ts
  // We include the prefix so we exit the filter early when the last
  // matching row has been reached
  //
  
  // 128BITS
  
  int[] bounds = { 0, 24 };
  
  //
  // Create singleton for each classId/labelsId combo
  //
  // TODO(hbs): we should really create multiple scanner, one per class Id for example,
  // 
  
  List<Pair<byte[], byte[]>> ranges = new ArrayList<Pair<byte[], byte[]>>();
  
  for (Metadata metadata: metadatas) {
    byte[][] keys = getKeys(metadata, now, timespan);
    byte[] lower = keys[0];
    byte[] upper = keys[1];
    
    this.metadatas.put(new String(Arrays.copyOfRange(lower, prefix.length, prefix.length + 16), StandardCharsets.ISO_8859_1), metadata);
    
    Pair<byte[],byte[]> range = new Pair<byte[],byte[]>(lower, upper);
    
    ranges.add(range);
  }
              
  SlicedRowFilter filter = new SlicedRowFilter(bounds, ranges, timespan < 0 ? -timespan : Long.MAX_VALUE);

  //
  // Create scanner. The start key is the lower bound of the first range
  //
  
  Scan scan = new Scan();
  scan.addFamily(colfam); // (HBaseStore.GTS_COLFAM, Longs.toByteArray(Long.MAX_VALUE - modulus));
  scan.setStartRow(filter.getStartKey());
  byte[] filterStopKey = filter.getStopKey();
  // Add one byte at the end (we can do that because we know the slice is the whole key)
  byte[] stopRow = Arrays.copyOf(filterStopKey, filterStopKey.length + 1);
  scan.setStopRow(stopRow);
  scan.setFilter(filter);
  
  scan.setMaxResultSize(1000000L);
  scan.setBatch(50000);
  scan.setCaching(50000);
  
  scan.setCacheBlocks(useBlockCache);

  Sensision.update(SensisionConstants.SENSISION_CLASS_CONTINUUM_HBASE_CLIENT_FILTERED_SCANNERS, Sensision.EMPTY_LABELS, 1);
  Sensision.update(SensisionConstants.SENSISION_CLASS_CONTINUUM_HBASE_CLIENT_FILTERED_SCANNERS_RANGES, Sensision.EMPTY_LABELS, ranges.size());

  try {
    this.htable = conn.getTable(tableName);
    this.scanner = this.htable.getScanner(scan);
    iter = scanner.iterator();          
  } catch (IOException ioe) {
    LOG.error("",ioe);
    this.iter = null;
  }
}
 
Example 14
Source Project: hbase   File: ThriftUtilities.java    License: Apache License 2.0 4 votes vote down vote up
public static Scan scanFromThrift(TScan in) throws IOException {
  Scan out = new Scan();

  if (in.isSetStartRow()) {
    out.withStartRow(in.getStartRow());
  }
  if (in.isSetStopRow()) {
    out.withStopRow(in.getStopRow());
  }
  if (in.isSetCaching()) {
    out.setCaching(in.getCaching());
  }
  if (in.isSetMaxVersions()) {
    out.readVersions(in.getMaxVersions());
  }

  if (in.isSetColumns()) {
    for (TColumn column : in.getColumns()) {
      if (column.isSetQualifier()) {
        out.addColumn(column.getFamily(), column.getQualifier());
      } else {
        out.addFamily(column.getFamily());
      }
    }
  }

  TTimeRange timeRange = in.getTimeRange();
  if (timeRange != null &&
      timeRange.isSetMinStamp() && timeRange.isSetMaxStamp()) {
    out.setTimeRange(timeRange.getMinStamp(), timeRange.getMaxStamp());
  }

  if (in.isSetBatchSize()) {
    out.setBatch(in.getBatchSize());
  }

  if (in.isSetFilterString()) {
    ParseFilter parseFilter = new ParseFilter();
    out.setFilter(parseFilter.parseFilterString(in.getFilterString()));
  }

  if (in.isSetAttributes()) {
    addAttributes(out,in.getAttributes());
  }

  if (in.isSetAuthorizations()) {
    out.setAuthorizations(new Authorizations(in.getAuthorizations().getLabels()));
  }

  if (in.isSetReversed()) {
    out.setReversed(in.isReversed());
  }

  if (in.isSetCacheBlocks()) {
    out.setCacheBlocks(in.isCacheBlocks());
  }

  if (in.isSetColFamTimeRangeMap()) {
    Map<ByteBuffer, TTimeRange> colFamTimeRangeMap = in.getColFamTimeRangeMap();
    if (MapUtils.isNotEmpty(colFamTimeRangeMap)) {
      for (Map.Entry<ByteBuffer, TTimeRange> entry : colFamTimeRangeMap.entrySet()) {
        out.setColumnFamilyTimeRange(Bytes.toBytes(entry.getKey()),
            entry.getValue().getMinStamp(), entry.getValue().getMaxStamp());
      }
    }
  }

  if (in.isSetReadType()) {
    out.setReadType(readTypeFromThrift(in.getReadType()));
  }

  if (in.isSetLimit()) {
    out.setLimit(in.getLimit());
  }

  if (in.isSetConsistency()) {
    out.setConsistency(consistencyFromThrift(in.getConsistency()));
  }

  if (in.isSetTargetReplicaId()) {
    out.setReplicaId(in.getTargetReplicaId());
  }

  if (in.isSetFilterBytes()) {
    out.setFilter(filterFromThrift(in.getFilterBytes()));
  }

  return out;
}
 
Example 15
Source Project: hbase   File: ThriftHBaseServiceHandler.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public int scannerOpenWithScan(ByteBuffer tableName, TScan tScan,
    Map<ByteBuffer, ByteBuffer> attributes)
    throws IOError {

  Table table = null;
  try {
    table = getTable(tableName);
    Scan scan = new Scan();
    addAttributes(scan, attributes);
    if (tScan.isSetStartRow()) {
      scan.withStartRow(tScan.getStartRow());
    }
    if (tScan.isSetStopRow()) {
      scan.withStopRow(tScan.getStopRow());
    }
    if (tScan.isSetTimestamp()) {
      scan.setTimeRange(0, tScan.getTimestamp());
    }
    if (tScan.isSetCaching()) {
      scan.setCaching(tScan.getCaching());
    }
    if (tScan.isSetBatchSize()) {
      scan.setBatch(tScan.getBatchSize());
    }
    if (tScan.isSetColumns() && !tScan.getColumns().isEmpty()) {
      for(ByteBuffer column : tScan.getColumns()) {
        byte [][] famQf = CellUtil.parseColumn(getBytes(column));
        if(famQf.length == 1) {
          scan.addFamily(famQf[0]);
        } else {
          scan.addColumn(famQf[0], famQf[1]);
        }
      }
    }
    if (tScan.isSetFilterString()) {
      ParseFilter parseFilter = new ParseFilter();
      scan.setFilter(
          parseFilter.parseFilterString(tScan.getFilterString()));
    }
    if (tScan.isSetReversed()) {
      scan.setReversed(tScan.isReversed());
    }
    if (tScan.isSetCacheBlocks()) {
      scan.setCacheBlocks(tScan.isCacheBlocks());
    }
    return addScanner(table.getScanner(scan), tScan.sortColumns);
  } catch (IOException e) {
    LOG.warn(e.getMessage(), e);
    throw getIOError(e);
  } finally{
    closeTable(table);
  }
}
 
Example 16
Source Project: hbase   File: TestWideScanner.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testWideScanBatching() throws IOException {
  final int batch = 256;
  int inserted = addWideContent(REGION);
  List<Cell> results = new ArrayList<>();
  Scan scan = new Scan();
  scan.addFamily(A);
  scan.addFamily(B);
  scan.addFamily(C);
  scan.readVersions(100);
  scan.setBatch(batch);
  try (InternalScanner s = REGION.getScanner(scan)) {
    int total = 0;
    int i = 0;
    boolean more;
    do {
      more = s.next(results);
      i++;
      LOG.info("iteration #" + i + ", results.size=" + results.size());

      // assert that the result set is no larger
      assertTrue(results.size() <= batch);

      total += results.size();

      if (results.size() > 0) {
        // assert that all results are from the same row
        byte[] row = CellUtil.cloneRow(results.get(0));
        for (Cell kv : results) {
          assertTrue(Bytes.equals(row, CellUtil.cloneRow(kv)));
        }
      }

      results.clear();

      // trigger ChangedReadersObservers
      Iterator<KeyValueScanner> scanners =
        ((HRegion.RegionScannerImpl) s).storeHeap.getHeap().iterator();
      while (scanners.hasNext()) {
        StoreScanner ss = (StoreScanner) scanners.next();
        ss.updateReaders(Collections.emptyList(), Collections.emptyList());
      }
    } while (more);

    // assert that the scanner returned all values
    LOG.info("inserted " + inserted + ", scanned " + total);
    assertEquals(total, inserted);
  }
}
 
Example 17
Source Project: hbase   File: CopyTable.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Sets up the actual job.
 *
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public Job createSubmittableJob(String[] args) throws IOException {
  if (!doCommandLine(args)) {
    return null;
  }

  String jobName = NAME + "_" + (tableName == null ? snapshot : tableName);
  Job job = Job.getInstance(getConf(), getConf().get(JOB_NAME_CONF_KEY, jobName));
  job.setJarByClass(CopyTable.class);
  Scan scan = new Scan();

  scan.setBatch(batch);
  scan.setCacheBlocks(false);

  if (cacheRow > 0) {
    scan.setCaching(cacheRow);
  } else {
    scan.setCaching(getConf().getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 100));
  }

  scan.setTimeRange(startTime, endTime);

  if (allCells) {
    scan.setRaw(true);
  }
  if (shuffle) {
    job.getConfiguration().set(TableInputFormat.SHUFFLE_MAPS, "true");
  }
  if (versions >= 0) {
    scan.readVersions(versions);
  }

  if (startRow != null) {
    scan.withStartRow(Bytes.toBytesBinary(startRow));
  }

  if (stopRow != null) {
    scan.withStopRow(Bytes.toBytesBinary(stopRow));
  }

  if(families != null) {
    String[] fams = families.split(",");
    Map<String,String> cfRenameMap = new HashMap<>();
    for(String fam : fams) {
      String sourceCf;
      if(fam.contains(":")) {
          // fam looks like "sourceCfName:destCfName"
          String[] srcAndDest = fam.split(":", 2);
          sourceCf = srcAndDest[0];
          String destCf = srcAndDest[1];
          cfRenameMap.put(sourceCf, destCf);
      } else {
          // fam is just "sourceCf"
          sourceCf = fam;
      }
      scan.addFamily(Bytes.toBytes(sourceCf));
    }
    Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
  }
  job.setNumReduceTasks(0);

  if (bulkload) {
    initCopyTableMapperReducerJob(job, scan);

    // We need to split the inputs by destination tables so that output of Map can be bulk-loaded.
    TableInputFormat.configureSplitTable(job, TableName.valueOf(dstTableName));

    bulkloadDir = generateUniqTempDir(false);
    LOG.info("HFiles will be stored at " + this.bulkloadDir);
    HFileOutputFormat2.setOutputPath(job, bulkloadDir);
    try (Connection conn = ConnectionFactory.createConnection(getConf());
        Admin admin = conn.getAdmin()) {
      HFileOutputFormat2.configureIncrementalLoadMap(job,
        admin.getDescriptor((TableName.valueOf(dstTableName))));
    }
  } else {
    initCopyTableMapperReducerJob(job, scan);
    TableMapReduceUtil.initTableReducerJob(dstTableName, null, job, null, peerAddress, null,
      null);
  }

  return job;
}
 
Example 18
Source Project: hbase   File: TableInputFormat.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Sets up a {@link Scan} instance, applying settings from the configuration property
 * constants defined in {@code TableInputFormat}.  This allows specifying things such as:
 * <ul>
 *   <li>start and stop rows</li>
 *   <li>column qualifiers or families</li>
 *   <li>timestamps or timerange</li>
 *   <li>scanner caching and batch size</li>
 * </ul>
 */
public static Scan createScanFromConfiguration(Configuration conf) throws IOException {
  Scan scan = new Scan();

  if (conf.get(SCAN_ROW_START) != null) {
    scan.withStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START)));
  }

  if (conf.get(SCAN_ROW_STOP) != null) {
    scan.withStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP)));
  }

  if (conf.get(SCAN_COLUMNS) != null) {
    addColumns(scan, conf.get(SCAN_COLUMNS));
  }

  for (String columnFamily : conf.getTrimmedStrings(SCAN_COLUMN_FAMILY)) {
    scan.addFamily(Bytes.toBytes(columnFamily));
  }

  if (conf.get(SCAN_TIMESTAMP) != null) {
    scan.setTimestamp(Long.parseLong(conf.get(SCAN_TIMESTAMP)));
  }

  if (conf.get(SCAN_TIMERANGE_START) != null && conf.get(SCAN_TIMERANGE_END) != null) {
    scan.setTimeRange(
        Long.parseLong(conf.get(SCAN_TIMERANGE_START)),
        Long.parseLong(conf.get(SCAN_TIMERANGE_END)));
  }

  if (conf.get(SCAN_MAXVERSIONS) != null) {
    scan.readVersions(Integer.parseInt(conf.get(SCAN_MAXVERSIONS)));
  }

  if (conf.get(SCAN_CACHEDROWS) != null) {
    scan.setCaching(Integer.parseInt(conf.get(SCAN_CACHEDROWS)));
  }

  if (conf.get(SCAN_BATCHSIZE) != null) {
    scan.setBatch(Integer.parseInt(conf.get(SCAN_BATCHSIZE)));
  }

  // false by default, full table scans generate too much BC churn
  scan.setCacheBlocks((conf.getBoolean(SCAN_CACHEBLOCKS, false)));

  return scan;
}
 
Example 19
Source Project: hbase   File: IntegrationTestBulkLoad.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * After adding data to the table start a mr job to
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
private void runCheck() throws IOException, ClassNotFoundException, InterruptedException {
  LOG.info("Running check");
  Configuration conf = getConf();
  String jobName = getTablename() + "_check" + EnvironmentEdgeManager.currentTime();
  Path p = util.getDataTestDirOnTestFS(jobName);

  Job job = new Job(conf);
  job.setJarByClass(getClass());
  job.setJobName(jobName);

  job.setPartitionerClass(NaturalKeyPartitioner.class);
  job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
  job.setSortComparatorClass(CompositeKeyComparator.class);

  Scan scan = new Scan();
  scan.addFamily(CHAIN_FAM);
  scan.addFamily(SORT_FAM);
  scan.readVersions(1);
  scan.setCacheBlocks(false);
  scan.setBatch(1000);

  int replicaCount = conf.getInt(NUM_REPLICA_COUNT_KEY, NUM_REPLICA_COUNT_DEFAULT);
  if (replicaCount != NUM_REPLICA_COUNT_DEFAULT) {
    scan.setConsistency(Consistency.TIMELINE);
  }

  TableMapReduceUtil.initTableMapperJob(
      getTablename().getName(),
      scan,
      LinkedListCheckingMapper.class,
      LinkKey.class,
      LinkChain.class,
      job
  );

  job.setReducerClass(LinkedListCheckingReducer.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  FileOutputFormat.setOutputPath(job, p);

  assertEquals(true, job.waitForCompletion(true));

  // Delete the files.
  util.getTestFileSystem().delete(p, true);
}
 
Example 20
@Override
public int run(String[] args) throws Exception {
  Options options = new Options();
  options.addOption("s", "start", true, "start key");
  options.addOption("e", "end", true, "end key");
  options.addOption("l", "limit", true, "number to print");

  GnuParser parser = new GnuParser();
  CommandLine cmd = null;
  try {
    cmd = parser.parse(options, args);
    if (cmd.getArgs().length != 0) {
      throw new ParseException("Command takes no arguments");
    }
  } catch (ParseException e) {
    System.err.println("Failed to parse command line " + e.getMessage());
    System.err.println();
    HelpFormatter formatter = new HelpFormatter();
    formatter.printHelp(getClass().getSimpleName(), options);
    System.exit(-1);
  }

  Connection connection = ConnectionFactory.createConnection(getConf());
  Table table = connection.getTable(getTableName(getConf()));

  Scan scan = new Scan();
  scan.setBatch(10000);

  if (cmd.hasOption("s"))
    scan.withStartRow(Bytes.toBytesBinary(cmd.getOptionValue("s")));

  if (cmd.hasOption("e")) {
    scan.withStopRow(Bytes.toBytesBinary(cmd.getOptionValue("e")));
  }

  int limit = 0;
  if (cmd.hasOption("l"))
    limit = Integer.parseInt(cmd.getOptionValue("l"));
  else
    limit = 100;

  ResultScanner scanner = table.getScanner(scan);

  CINode node = new CINode();
  Result result = scanner.next();
  int count = 0;
  while (result != null && count++ < limit) {
    node = getCINode(result, node);
    System.out.printf("%s:%s:%012d:%s\n", Bytes.toStringBinary(node.key),
        Bytes.toStringBinary(node.prev), node.count, node.client);
    result = scanner.next();
  }
  scanner.close();
  table.close();
  connection.close();

  return 0;
}