org.apache.hadoop.hbase.io.hfile.HFileScanner Java Examples

The following examples show how to use org.apache.hadoop.hbase.io.hfile.HFileScanner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HFileSortedOplog.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
public HFileSortedIterator(HFileScanner scan, byte[] from, boolean fromInclusive, byte[] to, 
    boolean toInclusive) throws IOException {
  this.scan = scan;
  this.from = from;
  this.fromInclusive = fromInclusive;
  this.to = to;
  this.toInclusive = toInclusive;

  scanStat = (stats == null) ? new SortedOplogStatistics("", "").new ScanOperation(
      0, 0, 0, 0, 0, 0, 0) : stats.getScan();
  scanStart = scanStat.begin();

  if (scan == null) {
    return;
  }

  assert from == null || to == null
      || scan.getReader().getComparator().compare(from, to) <= 0;

  initIterator();
}
 
Example #2
Source File: HalfStoreFileReader.java    From hbase with Apache License 2.0 6 votes vote down vote up
@Override
public Optional<Cell> getFirstKey() {
  if (!firstKeySeeked) {
    HFileScanner scanner = getScanner(true, true, false);
    try {
      if (scanner.seekTo()) {
        this.firstKey = Optional.ofNullable(scanner.getKey());
      }
      firstKeySeeked = true;
    } catch (IOException e) {
      LOG.warn("Failed seekTo first KV in the file", e);
    } finally {
      if(scanner != null) {
        scanner.close();
      }
    }
  }
  return this.firstKey;
}
 
Example #3
Source File: HalfStoreFileReader.java    From hbase with Apache License 2.0 6 votes vote down vote up
@Override
public Optional<Cell> getLastKey() {
  if (top) {
    return super.getLastKey();
  }
  // Get a scanner that caches the block and that uses pread.
  HFileScanner scanner = getScanner(true, true);
  try {
    if (scanner.seekBefore(this.splitCell)) {
      return Optional.ofNullable(scanner.getKey());
    }
  } catch (IOException e) {
    LOG.warn("Failed seekBefore " + Bytes.toStringBinary(this.splitkey), e);
  } finally {
    if (scanner != null) {
      scanner.close();
    }
  }
  return Optional.empty();
}
 
Example #4
Source File: StoreFileScanner.java    From hbase with Apache License 2.0 6 votes vote down vote up
static boolean reseekAtOrAfter(HFileScanner s, Cell k)
throws IOException {
  //This function is similar to seekAtOrAfter function
  int result = s.reseekTo(k);
  if (result <= 0) {
    if (result == HConstants.INDEX_KEY_MAGIC) {
      // using faked key
      return true;
    }
    // If up to now scanner is not seeked yet, this means passed KV is smaller
    // than first KV in file, and it is the first time we seek on this file.
    // So we also need to work from the start of file.
    if (!s.isSeeked()) {
      return  s.seekTo();
    }
    return true;
  }
  // passed KV is larger than current KV in file, if there is a next
  // it is after, if not then this scanner is done.
  return s.next();
}
 
Example #5
Source File: StoreFileScanner.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param s
 * @param k
 * @return false if not found or if k is after the end.
 * @throws IOException
 */
public static boolean seekAtOrAfter(HFileScanner s, Cell k)
throws IOException {
  int result = s.seekTo(k);
  if(result < 0) {
    if (result == HConstants.INDEX_KEY_MAGIC) {
      // using faked key
      return true;
    }
    // Passed KV is smaller than first KV in file, work from start of file
    return s.seekTo();
  } else if(result > 0) {
    // Passed KV is larger than current KV in file, if there is a next
    // it is the "after", if not then this scanner is done.
    return s.next();
  }
  // Seeked to the exact key
  return true;
}
 
Example #6
Source File: HFilePerformanceEvaluation.java    From hbase with Apache License 2.0 6 votes vote down vote up
@Override
void doRow(int i) throws Exception {
  HFileScanner scanner = this.reader.getScanner(false, false);
  byte [] b = getRandomRow();
  // System.out.println("Random row: " + new String(b));
  Cell c = createCell(b);
  if (scanner.seekTo(c) != 0) {
    LOG.info("Nonexistent row: " + new String(b));
    return;
  }
  // TODO: HFileScanner doesn't do Cells yet. Temporary fix.
  c = scanner.getCell();
  // System.out.println("Found row: " +
  //  new String(c.getRowArray(), c.getRowOffset(), c.getRowLength()));
  PerformanceEvaluationCommons.assertKey(b, c);
  for (int ii = 0; ii < 30; ii++) {
    if (!scanner.next()) {
      LOG.info("NOTHING FOLLOWS");
      return;
    }
    c = scanner.getCell();
    PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
  }
}
 
Example #7
Source File: TestMajorCompaction.java    From hbase with Apache License 2.0 6 votes vote down vote up
private void verifyCounts(int countRow1, int countRow2) throws Exception {
  int count1 = 0;
  int count2 = 0;
  for (HStoreFile f : r.getStore(COLUMN_FAMILY_TEXT).getStorefiles()) {
    HFileScanner scanner = f.getReader().getScanner(false, false);
    scanner.seekTo();
    do {
      byte[] row = CellUtil.cloneRow(scanner.getCell());
      if (Bytes.equals(row, STARTROW)) {
        count1++;
      } else if (Bytes.equals(row, secondRowBytes)) {
        count2++;
      }
    } while (scanner.next());
  }
  assertEquals(countRow1, count1);
  assertEquals(countRow2, count2);
}
 
Example #8
Source File: HFileSortedOplog.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
public HFileSortedIterator(HFileScanner scan, SerializedComparator comparator, 
    byte[] from, boolean fromInclusive, 
    byte[] to, boolean toInclusive) throws IOException {
  this.scan = scan;
  this.comparator = comparator;
  this.from = from;
  this.fromInclusive = fromInclusive;
  this.to = to;
  this.toInclusive = toInclusive;
  
  assert from == null 
      || to == null 
      || comparator.compare(from, 0, from.length, to, 0, to.length) <= 0;
  
  start = sopConfig.getStatistics().getScan().begin();
  foundNext = evalFrom();
}
 
Example #9
Source File: HFileReader.java    From terrapin with Apache License 2.0 6 votes vote down vote up
/**
 * Issues an HFile lookup on the underlying HFile.Reader. This is protected
 * for testing.
 */
protected Pair<ByteBuffer, Pair<ByteBuffer, Throwable>> getValueFromHFile(ByteBuffer key) {
  try {
    HFileScanner scanner = reader.getScanner(true, true, false);
    KeyValue kv = buildKeyValueForLookup(
        BytesUtil.readBytesFromByteBufferWithoutConsume(key));
    int code = scanner.seekTo(kv.getKey());
    ByteBuffer value = null;
    if (code == 0) {
      value = ByteBuffer.wrap(scanner.getKeyValue().getValue());
      if (this.sizeStatsKey != null) {
        Stats.addMetric(this.sizeStatsKey, value.remaining());
      }
      Stats.addMetric("value-size", value.remaining());
    } else {
      Stats.incr("not-found");
      if (this.notFoundStatsKey != null) {
        Stats.incr(this.notFoundStatsKey);
      }
    }
    return new ImmutablePair(key, new ImmutablePair(value, null));
  } catch (Throwable t) {
    return new ImmutablePair(key, new ImmutablePair(null, t));
  }
}
 
Example #10
Source File: TestInLineFileSystemHFileInLining.java    From hudi with Apache License 2.0 6 votes vote down vote up
private int readAndCheckbytes(HFileScanner scanner, int start, int n)
    throws IOException {
  String value = "value";
  int i = start;
  for (; i < (start + n); i++) {
    ByteBuffer key = scanner.getKey();
    ByteBuffer val = scanner.getValue();
    String keyStr = String.format(LOCAL_FORMATTER, Integer.valueOf(i));
    String valStr = value + keyStr;
    KeyValue kv = new KeyValue(Bytes.toBytes(keyStr), Bytes.toBytes("family"),
        Bytes.toBytes("qual"), Bytes.toBytes(valStr));
    byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(key), 0,
        Bytes.toBytes(key).length).getKey();
    assertArrayEquals(kv.getKey(), keyBytes,
        "bytes for keys do not match " + keyStr + " " + Bytes.toString(Bytes.toBytes(key)));
    byte[] valBytes = Bytes.toBytes(val);
    assertArrayEquals(Bytes.toBytes(valStr), valBytes,
        "bytes for vals do not match " + valStr + " " + Bytes.toString(valBytes));
    if (!scanner.next()) {
      break;
    }
  }
  assertEquals(i, start + n - 1);
  return (start + n);
}
 
Example #11
Source File: HFileSortedOplog.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
public HFileSortedIterator(HFileScanner scan, byte[] from, boolean fromInclusive, byte[] to, 
    boolean toInclusive) throws IOException {
  this.scan = scan;
  this.from = from;
  this.fromInclusive = fromInclusive;
  this.to = to;
  this.toInclusive = toInclusive;

  scanStat = (stats == null) ? new SortedOplogStatistics("", "").new ScanOperation(
      0, 0, 0, 0, 0, 0, 0) : stats.getScan();
  scanStart = scanStat.begin();

  if (scan == null) {
    return;
  }

  assert from == null || to == null
      || scan.getReader().getComparator().compare(from, to) <= 0;

  initIterator();
}
 
Example #12
Source File: HFileSortedOplog.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
public HFileSortedIterator(HFileScanner scan, SerializedComparator comparator, 
    byte[] from, boolean fromInclusive, 
    byte[] to, boolean toInclusive) throws IOException {
  this.scan = scan;
  this.comparator = comparator;
  this.from = from;
  this.fromInclusive = fromInclusive;
  this.to = to;
  this.toInclusive = toInclusive;
  
  assert from == null 
      || to == null 
      || comparator.compare(from, 0, from.length, to, 0, to.length) <= 0;
  
  start = sopConfig.getStatistics().getScan().begin();
  foundNext = evalFrom();
}
 
Example #13
Source File: TestHStoreFile.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Test
public void testHFileLink() throws IOException {
  final RegionInfo hri =
    RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build();
  // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
  Configuration testConf = new Configuration(this.conf);
  CommonFSUtils.setRootDir(testConf, testDir);
  HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
    CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
  HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();

  // Make a store file and write data to it.
  StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
    .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
  writeStoreFile(writer);

  Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
  Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
  HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
  Path linkFilePath =
    new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));

  // Try to open store file from link
  StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath, true);
  HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf);
  assertTrue(storeFileInfo.isLink());
  hsf.initReader();

  // Now confirm that I can read from the link
  int count = 1;
  HFileScanner s = hsf.getReader().getScanner(false, false);
  s.seekTo();
  while (s.next()) {
    count++;
  }
  assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
}
 
Example #14
Source File: TestHStoreFile.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyStoreFileRestrictKeyRanges() throws Exception {
  StoreFileReader reader = mock(StoreFileReader.class);
  HStore store = mock(HStore.class);
  byte[] cf = Bytes.toBytes("ty");
  ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf);
  when(store.getColumnFamilyDescriptor()).thenReturn(cfd);
  try (StoreFileScanner scanner =
    new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true)) {
    Scan scan = new Scan();
    scan.setColumnFamilyTimeRange(cf, 0, 1);
    assertFalse(scanner.shouldUseScanner(scan, store, 0));
  }
}
 
Example #15
Source File: TestImportTSVWithVisibilityLabels.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Method returns the total KVs in given hfile
 * @param fs File System
 * @param p HFile path
 * @return KV count in the given hfile
 * @throws IOException
 */
private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
  Configuration conf = util.getConfiguration();
  HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
  HFileScanner scanner = reader.getScanner(false, false);
  scanner.seekTo();
  int count = 0;
  do {
    count++;
  } while (scanner.next());
  reader.close();
  return count;
}
 
Example #16
Source File: TestHStoreFile.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Test
public void testStoreFileReference() throws Exception {
  final RegionInfo hri =
    RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build();
  HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
    new Path(testDir, hri.getTable().getNameAsString()), hri);
  HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();

  // Make a store file and write data to it.
  StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
    .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
  writeStoreFile(writer);
  Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
  writer.close();

  HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
  file.initReader();
  StoreFileReader r = file.getReader();
  assertNotNull(r);
  StoreFileScanner scanner =
    new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false);

  // Verify after instantiating scanner refCount is increased
  assertTrue("Verify file is being referenced", file.isReferencedInReads());
  scanner.close();
  // Verify after closing scanner refCount is decreased
  assertFalse("Verify file is not being referenced", file.isReferencedInReads());
}
 
Example #17
Source File: HFilePerformanceEvaluation.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
void doRow(int i) throws Exception {
  HFileScanner scanner = this.reader.getScanner(false, true);
  byte[] gaussianRandomRowBytes = getGaussianRandomRowBytes();
  scanner.seekTo(createCell(gaussianRandomRowBytes));
  for (int ii = 0; ii < 30; ii++) {
    if (!scanner.next()) {
      LOG.info("NOTHING FOLLOWS");
      return;
    }
    // TODO: Fix. Make scanner do Cells.
    scanner.getCell();
  }
}
 
Example #18
Source File: TestCacheOnWriteInSchema.java    From hbase with Apache License 2.0 5 votes vote down vote up
private void readStoreFile(Path path) throws IOException {
  CacheConfig cacheConf = store.getCacheConfig();
  BlockCache cache = cacheConf.getBlockCache().get();
  HStoreFile sf = new HStoreFile(fs, path, conf, cacheConf, BloomType.ROWCOL, true);
  sf.initReader();
  HFile.Reader reader = sf.getReader().getHFileReader();
  try {
    // Open a scanner with (on read) caching disabled
    HFileScanner scanner = reader.getScanner(false, false);
    assertTrue(testDescription, scanner.seekTo());
    // Cribbed from io.hfile.TestCacheOnWrite
    long offset = 0;
    while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
      // Flags: don't cache the block, use pread, this is not a compaction.
      // Also, pass null for expected block type to avoid checking it.
      HFileBlock block = reader.readBlock(offset, -1, false, true,
        false, true, null, DataBlockEncoding.NONE);
      BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
        offset);
      boolean isCached = cache.getBlock(blockCacheKey, true, false, true) != null;
      boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
      final BlockType blockType = block.getBlockType();

      if (shouldBeCached != isCached &&
          (cowType.blockType1.equals(blockType) || cowType.blockType2.equals(blockType))) {
        throw new AssertionError(
          "shouldBeCached: " + shouldBeCached+ "\n" +
          "isCached: " + isCached + "\n" +
          "Test description: " + testDescription + "\n" +
          "block: " + block + "\n" +
          "blockCacheKey: " + blockCacheKey);
      }
      offset += block.getOnDiskSizeWithHeader();
    }
  } finally {
    reader.close();
  }
}
 
Example #19
Source File: HFilePerformanceEvaluation.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
void doRow(int i) throws Exception {
  HFileScanner scanner = this.reader.getScanner(false, true);
  byte [] b = getRandomRow();
  if (scanner.seekTo(createCell(b)) < 0) {
    LOG.info("Not able to seekTo " + new String(b));
    return;
  }
  // TODO: Fix scanner so it does Cells
  Cell c = scanner.getCell();
  PerformanceEvaluationCommons.assertKey(b, c);
  PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
}
 
Example #20
Source File: TestHalfStoreFileReader.java    From hbase with Apache License 2.0 5 votes vote down vote up
private Cell doTestOfSeekBefore(Path p, FileSystem fs, Reference bottom, Cell seekBefore,
    CacheConfig cacheConfig) throws IOException {
  ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, p).build();
  HFileInfo fileInfo = new HFileInfo(context, TEST_UTIL.getConfiguration());
  final HalfStoreFileReader halfreader = new HalfStoreFileReader(context, fileInfo, cacheConfig,
      bottom, new AtomicInteger(0), TEST_UTIL.getConfiguration());
  fileInfo.initMetaAndIndex(halfreader.getHFileReader());
  halfreader.loadFileInfo();
  final HFileScanner scanner = halfreader.getScanner(false, false);
  scanner.seekBefore(seekBefore);
  return scanner.getCell();
}
 
Example #21
Source File: TestBulkLoadHFiles.java    From hbase with Apache License 2.0 5 votes vote down vote up
private int verifyHFile(Path p) throws IOException {
  Configuration conf = util.getConfiguration();
  HFile.Reader reader =
    HFile.createReader(p.getFileSystem(conf), p, new CacheConfig(conf), true, conf);
  HFileScanner scanner = reader.getScanner(false, false);
  scanner.seekTo();
  int count = 0;
  do {
    count++;
  } while (scanner.next());
  assertTrue(count > 0);
  reader.close();
  return count;
}
 
Example #22
Source File: TestImportTsv.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Method returns the total KVs in given hfile
 * @param fs File System
 * @param p HFile path
 * @return KV count in the given hfile
 * @throws IOException
 */
private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
  Configuration conf = util.getConfiguration();
  HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
  HFileScanner scanner = reader.getScanner(false, false);
  scanner.seekTo();
  int count = 0;
  do {
    count++;
  } while (scanner.next());
  reader.close();
  return count;
}
 
Example #23
Source File: StoreFileScanner.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
 * @param useMVCC If true, scanner will filter out updates with MVCC larger than {@code readPt}.
 * @param readPt MVCC value to use to filter out the updates newer than this scanner.
 * @param hasMVCC Set to true if underlying store file reader has MVCC info.
 * @param scannerOrder Order of the scanner relative to other scanners. See
 *          {@link KeyValueScanner#getScannerOrder()}.
 * @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column,
 *          otherwise {@code false}. This is a hint for optimization.
 */
public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVCC,
    boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
  this.readPt = readPt;
  this.reader = reader;
  this.hfs = hfs;
  this.enforceMVCC = useMVCC;
  this.hasMVCCInfo = hasMVCC;
  this.scannerOrder = scannerOrder;
  this.canOptimizeForNonNullColumn = canOptimizeForNonNullColumn;
  this.reader.incrementRefCount();
}
 
Example #24
Source File: MizoHFileIterator.java    From mizo with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an inner HFileScanner object for a given HFile path
 */
public static HFileScanner createScanner(FileSystem fs, Path path) throws IOException {
    Configuration config = fs.getConf();
    HFile.Reader reader = HFile.createReader(fs, path, getCacheConfig(config), config);

    HFileScanner scanner = reader.getScanner(false, false);
    scanner.seekTo();

    return scanner;
}
 
Example #25
Source File: CompressionTest.java    From hbase with Apache License 2.0 5 votes vote down vote up
public static void doSmokeTest(FileSystem fs, Path path, String codec)
throws Exception {
  Configuration conf = HBaseConfiguration.create();
  HFileContext context = new HFileContextBuilder()
                         .withCompression(HFileWriterImpl.compressionByName(codec)).build();
  HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
      .withPath(fs, path)
      .withFileContext(context)
      .create();
  // Write any-old Cell...
  final byte [] rowKey = Bytes.toBytes("compressiontestkey");
  Cell c = ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
    .setRow(rowKey)
    .setFamily(HConstants.EMPTY_BYTE_ARRAY)
    .setQualifier(HConstants.EMPTY_BYTE_ARRAY)
    .setTimestamp(HConstants.LATEST_TIMESTAMP)
    .setType(KeyValue.Type.Maximum.getCode())
    .setValue(Bytes.toBytes("compressiontestval"))
    .build();
  writer.append(c);
  writer.appendFileInfo(Bytes.toBytes("compressioninfokey"), Bytes.toBytes("compressioninfoval"));
  writer.close();
  Cell cc = null;
  HFile.Reader reader = HFile.createReader(fs, path, CacheConfig.DISABLED, true, conf);
  try {
    HFileScanner scanner = reader.getScanner(false, true);
    scanner.seekTo(); // position to the start of file
    // Scanner does not do Cells yet. Do below for now till fixed.
    cc = scanner.getCell();
    if (CellComparator.getInstance().compareRows(c, cc) != 0) {
      throw new Exception("Read back incorrect result: " + c.toString() + " vs " + cc.toString());
    }
  } finally {
    reader.close();
  }
}
 
Example #26
Source File: MizoHFileIterator.java    From mizo with Apache License 2.0 4 votes vote down vote up
public MizoHFileIterator(HFileScanner scanner) {
    this.hfileScanner = scanner;
}
 
Example #27
Source File: HStore.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * This throws a WrongRegionException if the HFile does not fit in this region, or an
 * InvalidHFileException if the HFile is not valid.
 */
public void assertBulkLoadHFileOk(Path srcPath) throws IOException {
  HFile.Reader reader  = null;
  try {
    LOG.info("Validating hfile at " + srcPath + " for inclusion in " + this);
    FileSystem srcFs = srcPath.getFileSystem(conf);
    srcFs.access(srcPath, FsAction.READ_WRITE);
    reader = HFile.createReader(srcFs, srcPath, cacheConf, isPrimaryReplicaStore(), conf);

    Optional<byte[]> firstKey = reader.getFirstRowKey();
    Preconditions.checkState(firstKey.isPresent(), "First key can not be null");
    Optional<Cell> lk = reader.getLastKey();
    Preconditions.checkState(lk.isPresent(), "Last key can not be null");
    byte[] lastKey =  CellUtil.cloneRow(lk.get());

    if (LOG.isDebugEnabled()) {
      LOG.debug("HFile bounds: first=" + Bytes.toStringBinary(firstKey.get()) +
          " last=" + Bytes.toStringBinary(lastKey));
      LOG.debug("Region bounds: first=" +
          Bytes.toStringBinary(getRegionInfo().getStartKey()) +
          " last=" + Bytes.toStringBinary(getRegionInfo().getEndKey()));
    }

    if (!this.getRegionInfo().containsRange(firstKey.get(), lastKey)) {
      throw new WrongRegionException(
          "Bulk load file " + srcPath.toString() + " does not fit inside region "
          + this.getRegionInfo().getRegionNameAsString());
    }

    if(reader.length() > conf.getLong(HConstants.HREGION_MAX_FILESIZE,
        HConstants.DEFAULT_MAX_FILE_SIZE)) {
      LOG.warn("Trying to bulk load hfile " + srcPath + " with size: " +
          reader.length() + " bytes can be problematic as it may lead to oversplitting.");
    }

    if (verifyBulkLoads) {
      long verificationStartTime = EnvironmentEdgeManager.currentTime();
      LOG.info("Full verification started for bulk load hfile: {}", srcPath);
      Cell prevCell = null;
      HFileScanner scanner = reader.getScanner(false, false, false);
      scanner.seekTo();
      do {
        Cell cell = scanner.getCell();
        if (prevCell != null) {
          if (comparator.compareRows(prevCell, cell) > 0) {
            throw new InvalidHFileException("Previous row is greater than"
                + " current row: path=" + srcPath + " previous="
                + CellUtil.getCellKeyAsString(prevCell) + " current="
                + CellUtil.getCellKeyAsString(cell));
          }
          if (CellComparator.getInstance().compareFamilies(prevCell, cell) != 0) {
            throw new InvalidHFileException("Previous key had different"
                + " family compared to current key: path=" + srcPath
                + " previous="
                + Bytes.toStringBinary(prevCell.getFamilyArray(), prevCell.getFamilyOffset(),
                    prevCell.getFamilyLength())
                + " current="
                + Bytes.toStringBinary(cell.getFamilyArray(), cell.getFamilyOffset(),
                    cell.getFamilyLength()));
          }
        }
        prevCell = cell;
      } while (scanner.next());
      LOG.info("Full verification complete for bulk load hfile: " + srcPath.toString() +
        " took " + (EnvironmentEdgeManager.currentTime() - verificationStartTime) + " ms");
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
}
 
Example #28
Source File: TestHFileOutputFormat2.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
 * hfile.
 */
@Test
public void test_WritingTagData()
    throws Exception {
  Configuration conf = new Configuration(this.util.getConfiguration());
  final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
  conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
  RecordWriter<ImmutableBytesWritable, Cell> writer = null;
  TaskAttemptContext context = null;
  Path dir =
      util.getDataTestDir("WritingTagData");
  try {
    conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString());
    // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
    Job job = new Job(conf);
    FileOutputFormat.setOutputPath(job, dir);
    context = createTestTaskAttemptContext(job);
    HFileOutputFormat2 hof = new HFileOutputFormat2();
    writer = hof.getRecordWriter(context);
    final byte [] b = Bytes.toBytes("b");

    List< Tag > tags = new ArrayList<>();
    tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
    KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
    writer.write(new ImmutableBytesWritable(), kv);
    writer.close(context);
    writer = null;
    FileSystem fs = dir.getFileSystem(conf);
    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
    while(iterator.hasNext()) {
      LocatedFileStatus keyFileStatus = iterator.next();
      HFile.Reader reader =
          HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
      HFileScanner scanner = reader.getScanner(false, false, false);
      scanner.seekTo();
      Cell cell = scanner.getCell();
      List<Tag> tagsFromCell = PrivateCellUtil.getTags(cell);
      assertTrue(tagsFromCell.size() > 0);
      for (Tag tag : tagsFromCell) {
        assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
      }
    }
  } finally {
    if (writer != null && context != null) writer.close(context);
    dir.getFileSystem(conf).delete(dir, true);
  }
}
 
Example #29
Source File: TestHFileOutputFormat2.java    From hbase with Apache License 2.0 4 votes vote down vote up
/**
 * Run small MR job.
 */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
public void testWritingPEData() throws Exception {
  Configuration conf = util.getConfiguration();
  Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
  FileSystem fs = testDir.getFileSystem(conf);

  // Set down this value or we OOME in eclipse.
  conf.setInt("mapreduce.task.io.sort.mb", 20);
  // Write a few files.
  long hregionMaxFilesize = 10 * 1024;
  conf.setLong(HConstants.HREGION_MAX_FILESIZE, hregionMaxFilesize);

  Job job = new Job(conf, "testWritingPEData");
  setupRandomGeneratorMapper(job, false);
  // This partitioner doesn't work well for number keys but using it anyways
  // just to demonstrate how to configure it.
  byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
  byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];

  Arrays.fill(startKey, (byte)0);
  Arrays.fill(endKey, (byte)0xff);

  job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
  // Set start and end rows for partitioner.
  SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
  SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
  job.setReducerClass(CellSortReducer.class);
  job.setOutputFormatClass(HFileOutputFormat2.class);
  job.setNumReduceTasks(4);
  job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      CellSerialization.class.getName());

  FileOutputFormat.setOutputPath(job, testDir);
  assertTrue(job.waitForCompletion(false));
  FileStatus [] files = fs.listStatus(testDir);
  assertTrue(files.length > 0);

  //check output file num and size.
  for (byte[] family : FAMILIES) {
    long kvCount= 0;
    RemoteIterator<LocatedFileStatus> iterator =
            fs.listFiles(testDir.suffix("/" + new String(family)), true);
    while (iterator.hasNext()) {
      LocatedFileStatus keyFileStatus = iterator.next();
      HFile.Reader reader =
              HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
      HFileScanner scanner = reader.getScanner(false, false, false);

      kvCount += reader.getEntries();
      scanner.seekTo();
      long perKVSize = scanner.getCell().getSerializedSize();
      assertTrue("Data size of each file should not be too large.",
              perKVSize * reader.getEntries() <= hregionMaxFilesize);
    }
    assertEquals("Should write expected data in output file.", ROWSPERSPLIT, kvCount);
  }
}
 
Example #30
Source File: HFileSortedOplog.java    From gemfirexd-oss with Apache License 2.0 4 votes vote down vote up
@Override
public HoplogIterator<byte[], byte[]> scan(long offset, long length)
    throws IOException {
  /**
   * Identifies the first and last key to be scanned based on offset and
   * length. It loads hfile block index and identifies the first hfile block
   * starting after offset. The key of that block is from key for scanner.
   * Similarly it locates first block starting beyond offset + length range.
   * It uses key of that block as the to key for scanner
   */

  // load block indexes in memory
  BlockIndexReader bir = reader.getDataBlockIndexReader();
  int blockCount = bir.getRootBlockCount();
  
  byte[] fromKey = null, toKey = null;

  // find from key
  int i = 0;
  for (; i < blockCount; i++) {
    if (bir.getRootBlockOffset(i) < offset) {
      // hfile block has offset less than this reader's split offset. check
      // the next block
      continue;
    }

    // found the first hfile block starting after offset
    fromKey = bir.getRootBlockKey(i);
    break;
  }

  if (fromKey == null) {
    // seems no block starts after the offset. return no-op scanner
    return new HFileSortedIterator(null, null, false, null, false);
  }
  
  // find to key
  for (; i < blockCount; i++) {
    if (bir.getRootBlockOffset(i) < (offset + length)) {
      // this hfile block lies within the offset+lenght range. check the
      // next block for a higher offset
      continue;
    }

    // found the first block starting beyong offset+length range.
    toKey = bir.getRootBlockKey(i);
    break;
  }

  // from key is included in scan and to key is excluded
  HFileScanner scanner = reader.getScanner(true, false);
  return new HFileSortedIterator(scanner, fromKey, true, toKey, false);
}