Java Code Examples for org.apache.hadoop.hbase.io.hfile.HFile#Writer

The following examples show how to use org.apache.hadoop.hbase.io.hfile.HFile#Writer . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BloomFilterFactory.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new Delete Family Bloom filter at the time of
 * {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing.
 * @param conf
 * @param cacheConf
 * @param maxKeys an estimate of the number of keys we expect to insert.
 *        Irrelevant if compound Bloom filters are enabled.
 * @param writer the HFile writer
 * @return the new Bloom filter, or null in case Bloom filters are disabled
 *         or when failed to create one.
 */
public static BloomFilterWriter createDeleteBloomAtWrite(Configuration conf,
    CacheConfig cacheConf, int maxKeys, HFile.Writer writer) {
  if (!isDeleteFamilyBloomEnabled(conf)) {
    LOG.info("Delete Bloom filters are disabled by configuration for "
        + writer.getPath()
        + (conf == null ? " (configuration is null)" : ""));
    return null;
  }

  float err = getErrorRate(conf);

  int maxFold = getMaxFold(conf);
  // In case of compound Bloom filters we ignore the maxKeys hint.
  CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
      err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
      null, BloomType.ROW);
  writer.addInlineBlockWriter(bloomWriter);
  return bloomWriter;
}
 
Example 2
Source File: TestBulkLoad.java    From hbase with Apache License 2.0 6 votes vote down vote up
private String createHFileForFamilies(byte[] family) throws IOException {
  HFile.WriterFactory hFileFactory = HFile.getWriterFactoryNoCache(conf);
  // TODO We need a way to do this without creating files
  File hFileLocation = testFolder.newFile();
  FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(hFileLocation), null);
  try {
    hFileFactory.withOutputStream(out);
    hFileFactory.withFileContext(new HFileContextBuilder().build());
    HFile.Writer writer = hFileFactory.create();
    try {
      writer.append(new KeyValue(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
        .setRow(randomBytes)
        .setFamily(family)
        .setQualifier(randomBytes)
        .setTimestamp(0L)
        .setType(KeyValue.Type.Put.getCode())
        .setValue(randomBytes)
        .build()));
    } finally {
      writer.close();
    }
  } finally {
    out.close();
  }
  return hFileLocation.getAbsoluteFile().getAbsolutePath();
}
 
Example 3
Source File: TestAccessController.java    From hbase with Apache License 2.0 6 votes vote down vote up
private void createHFile(Path path,
    byte[] family, byte[] qualifier,
    byte[] startKey, byte[] endKey, int numRows) throws IOException {
  HFile.Writer writer = null;
  long now = System.currentTimeMillis();
  try {
    HFileContext context = new HFileContextBuilder().build();
    writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path)
        .withFileContext(context).create();
    // subtract 2 since numRows doesn't include boundary keys
    for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, true, numRows - 2)) {
      KeyValue kv = new KeyValue(key, family, qualifier, now, key);
      writer.append(kv);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example 4
Source File: TestHRegionServerBulkLoad.java    From hbase with Apache License 2.0 6 votes vote down vote up
/**
 * Create an HFile with the given number of rows with a specified value.
 */
public static void createHFile(FileSystem fs, Path path, byte[] family,
    byte[] qualifier, byte[] value, int numRows) throws IOException {
  HFileContext context = new HFileContextBuilder().withBlockSize(BLOCKSIZE)
                          .withCompression(COMPRESSION)
                          .build();
  HFile.Writer writer = HFile
      .getWriterFactory(conf, new CacheConfig(conf))
      .withPath(fs, path)
      .withFileContext(context)
      .create();
  long now = System.currentTimeMillis();
  try {
    // subtract 2 since iterateOnSplits doesn't include boundary keys
    for (int i = 0; i < numRows; i++) {
      KeyValue kv = new KeyValue(rowkey(i), family, qualifier, now, value);
      writer.append(kv);
    }
    writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(now));
  } finally {
    writer.close();
  }
}
 
Example 5
Source File: TestRegionObserverInterface.java    From hbase with Apache License 2.0 6 votes vote down vote up
private static void createHFile(Configuration conf, FileSystem fs, Path path, byte[] family,
    byte[] qualifier) throws IOException {
  HFileContext context = new HFileContextBuilder().build();
  HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path)
      .withFileContext(context).create();
  long now = System.currentTimeMillis();
  try {
    for (int i = 1; i <= 9; i++) {
      KeyValue kv =
          new KeyValue(Bytes.toBytes(i + ""), family, qualifier, now, Bytes.toBytes(i + ""));
      writer.append(kv);
    }
  } finally {
    writer.close();
  }
}
 
Example 6
Source File: CompressionTest.java    From hbase with Apache License 2.0 5 votes vote down vote up
public static void doSmokeTest(FileSystem fs, Path path, String codec)
throws Exception {
  Configuration conf = HBaseConfiguration.create();
  HFileContext context = new HFileContextBuilder()
                         .withCompression(HFileWriterImpl.compressionByName(codec)).build();
  HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
      .withPath(fs, path)
      .withFileContext(context)
      .create();
  // Write any-old Cell...
  final byte [] rowKey = Bytes.toBytes("compressiontestkey");
  Cell c = ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
    .setRow(rowKey)
    .setFamily(HConstants.EMPTY_BYTE_ARRAY)
    .setQualifier(HConstants.EMPTY_BYTE_ARRAY)
    .setTimestamp(HConstants.LATEST_TIMESTAMP)
    .setType(KeyValue.Type.Maximum.getCode())
    .setValue(Bytes.toBytes("compressiontestval"))
    .build();
  writer.append(c);
  writer.appendFileInfo(Bytes.toBytes("compressioninfokey"), Bytes.toBytes("compressioninfoval"));
  writer.close();
  Cell cc = null;
  HFile.Reader reader = HFile.createReader(fs, path, CacheConfig.DISABLED, true, conf);
  try {
    HFileScanner scanner = reader.getScanner(false, true);
    scanner.seekTo(); // position to the start of file
    // Scanner does not do Cells yet. Do below for now till fixed.
    cc = scanner.getCell();
    if (CellComparator.getInstance().compareRows(c, cc) != 0) {
      throw new Exception("Read back incorrect result: " + c.toString() + " vs " + cc.toString());
    }
  } finally {
    reader.close();
  }
}
 
Example 7
Source File: TestHalfStoreFileReader.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Test the scanner and reseek of a half hfile scanner. The scanner API demands that seekTo and
 * reseekTo() only return < 0 if the key lies before the start of the file (with no position on
 * the scanner). Returning 0 if perfect match (rare), and return > 1 if we got an imperfect match.
 * The latter case being the most common, we should generally be returning 1, and if we do, there
 * may or may not be a 'next' in the scanner/file. A bug in the half file scanner was returning -1
 * at the end of the bottom half, and that was causing the infrastructure above to go null causing
 * NPEs and other problems. This test reproduces that failure, and also tests both the bottom and
 * top of the file while we are at it.
 * @throws IOException
 */
@Test
public void testHalfScanAndReseek() throws IOException {
  String root_dir = TEST_UTIL.getDataTestDir().toString();
  Path p = new Path(root_dir, "test");

  Configuration conf = TEST_UTIL.getConfiguration();
  FileSystem fs = FileSystem.get(conf);
  CacheConfig cacheConf = new CacheConfig(conf);
  HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build();
  HFile.Writer w =
      HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create();

  // write some things.
  List<KeyValue> items = genSomeKeys();
  for (KeyValue kv : items) {
    w.append(kv);
  }
  w.close();

  HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf);
  Cell midKV = r.midKey().get();
  byte[] midkey = CellUtil.cloneRow(midKV);

  // System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey));

  Reference bottom = new Reference(midkey, Reference.Range.bottom);
  doTestOfScanAndReseek(p, fs, bottom, cacheConf);

  Reference top = new Reference(midkey, Reference.Range.top);
  doTestOfScanAndReseek(p, fs, top, cacheConf);

  r.close();
}
 
Example 8
Source File: TestBulkLoadReplication.java    From hbase with Apache License 2.0 5 votes vote down vote up
private String createHFileForFamilies(byte[] row, byte[] value,
    Configuration clusterConfig) throws IOException {
  CellBuilder cellBuilder = CellBuilderFactory.create(CellBuilderType.DEEP_COPY);
  cellBuilder.setRow(row)
    .setFamily(TestReplicationBase.famName)
    .setQualifier(Bytes.toBytes("1"))
    .setValue(value)
    .setType(Cell.Type.Put);

  HFile.WriterFactory hFileFactory = HFile.getWriterFactoryNoCache(clusterConfig);
  // TODO We need a way to do this without creating files
  File hFileLocation = testFolder.newFile();
  FSDataOutputStream out =
    new FSDataOutputStream(new FileOutputStream(hFileLocation), null);
  try {
    hFileFactory.withOutputStream(out);
    hFileFactory.withFileContext(new HFileContextBuilder().build());
    HFile.Writer writer = hFileFactory.create();
    try {
      writer.append(new KeyValue(cellBuilder.build()));
    } finally {
      writer.close();
    }
  } finally {
    out.close();
  }
  return hFileLocation.getAbsoluteFile().getAbsolutePath();
}
 
Example 9
Source File: TestHRegionReplayEvents.java    From hbase with Apache License 2.0 5 votes vote down vote up
private String createHFileForFamilies(Path testPath, byte[] family,
    byte[] valueBytes) throws IOException {
  HFile.WriterFactory hFileFactory = HFile.getWriterFactoryNoCache(TEST_UTIL.getConfiguration());
  // TODO We need a way to do this without creating files
  Path testFile = new Path(testPath, TEST_UTIL.getRandomUUID().toString());
  FSDataOutputStream out = TEST_UTIL.getTestFileSystem().create(testFile);
  try {
    hFileFactory.withOutputStream(out);
    hFileFactory.withFileContext(new HFileContextBuilder().build());
    HFile.Writer writer = hFileFactory.create();
    try {
      writer.append(new KeyValue(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
        .setRow(valueBytes)
        .setFamily(family)
        .setQualifier(valueBytes)
        .setTimestamp(0L)
        .setType(KeyValue.Type.Put.getCode())
        .setValue(valueBytes)
        .build()));
    } finally {
      writer.close();
    }
  } finally {
    out.close();
  }
  return testFile.toString();
}
 
Example 10
Source File: TestScannerWithBulkload.java    From hbase with Apache License 2.0 5 votes vote down vote up
private Path writeToHFile(long l, String hFilePath, String pathStr, boolean nativeHFile)
    throws IOException {
  FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
  final Path hfilePath = new Path(hFilePath);
  fs.mkdirs(hfilePath);
  Path path = new Path(pathStr);
  HFile.WriterFactory wf = HFile.getWriterFactoryNoCache(TEST_UTIL.getConfiguration());
  Assert.assertNotNull(wf);
  HFileContext context = new HFileContextBuilder().build();
  HFile.Writer writer = wf.withPath(fs, path).withFileContext(context).create();
  KeyValue kv = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l,
      Bytes.toBytes("version2"));

  // Set cell seq id to test bulk load native hfiles.
  if (nativeHFile) {
    // Set a big seq id. Scan should not look at this seq id in a bulk loaded file.
    // Scan should only look at the seq id appended at the bulk load time, and not skip
    // this kv.
    kv.setSequenceId(9999999);
  }

  writer.append(kv);

  if (nativeHFile) {
    // Set a big MAX_SEQ_ID_KEY. Scan should not look at this seq id in a bulk loaded file.
    // Scan should only look at the seq id appended at the bulk load time, and not skip its
    // kv.
    writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(new Long(9999999)));
  }
  else {
  writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
  }
  writer.close();
  return hfilePath;
}
 
Example 11
Source File: HFileTestUtil.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Create an HFile with the given number of rows between a given
 * start key and end key @ family:qualifier.
 * If withTag is true, we add the rowKey as the tag value for
 * tagtype MOB_TABLE_NAME_TAG_TYPE
 */
public static void createHFile(
    Configuration configuration,
    FileSystem fs, Path path, DataBlockEncoding encoding,
    byte[] family, byte[] qualifier,
    byte[] startKey, byte[] endKey, int numRows, boolean withTag) throws IOException {
  HFileContext meta = new HFileContextBuilder()
      .withIncludesTags(withTag)
      .withDataBlockEncoding(encoding)
      .withColumnFamily(family)
      .build();
  HFile.Writer writer = HFile.getWriterFactory(configuration, new CacheConfig(configuration))
      .withPath(fs, path)
      .withFileContext(meta)
      .create();
  long now = System.currentTimeMillis();
  try {
    // subtract 2 since iterateOnSplits doesn't include boundary keys
    for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows - 2)) {
      Cell kv = new KeyValue(key, family, qualifier, now, key);
      if (withTag) {
        // add a tag.  Arbitrarily chose mob tag since we have a helper already.
        Tag tableNameTag = new ArrayBackedTag(TagType.MOB_TABLE_NAME_TAG_TYPE, key);
        kv = MobUtils.createMobRefCell(kv, key, tableNameTag);

        // verify that the kv has the tag.
        Optional<Tag> tag = PrivateCellUtil.getTag(kv, TagType.MOB_TABLE_NAME_TAG_TYPE);
        if (!tag.isPresent()) {
          throw new IllegalStateException("Tag didn't stick to KV " + kv.toString());
        }
      }
      writer.append(kv);
    }
  } finally {
    writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
    writer.close();
  }
}
 
Example 12
Source File: TestMobStoreCompaction.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Create an HFile with the given number of bytes
 */
private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException {
  HFileContext meta = new HFileContextBuilder().build();
  HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path)
      .withFileContext(meta).create();
  long now = System.currentTimeMillis();
  try {
    KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY,
        Bytes.toBytes("colX"), now, dummyData);
    writer.append(kv);
  } finally {
    writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
    writer.close();
  }
}
 
Example 13
Source File: BloomFilterFactory.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new general (Row or RowCol) Bloom filter at the time of
 * {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing.
 *
 * @param conf
 * @param cacheConf
 * @param bloomType
 * @param maxKeys an estimate of the number of keys we expect to insert.
 *        Irrelevant if compound Bloom filters are enabled.
 * @param writer the HFile writer
 * @return the new Bloom filter, or null in case Bloom filters are disabled
 *         or when failed to create one.
 */
public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
    CacheConfig cacheConf, BloomType bloomType, int maxKeys,
    HFile.Writer writer) {
  if (!isGeneralBloomEnabled(conf)) {
    LOG.trace("Bloom filters are disabled by configuration for "
        + writer.getPath()
        + (conf == null ? " (configuration is null)" : ""));
    return null;
  } else if (bloomType == BloomType.NONE) {
    LOG.trace("Bloom filter is turned off for the column family");
    return null;
  }

  float err = getErrorRate(conf);

  // In case of row/column Bloom filter lookups, each lookup is an OR if two
  // separate lookups. Therefore, if each lookup's false positive rate is p,
  // the resulting false positive rate is err = 1 - (1 - p)^2, and
  // p = 1 - sqrt(1 - err).
  if (bloomType == BloomType.ROWCOL) {
    err = (float) (1 - Math.sqrt(1 - err));
  }

  int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
      MAX_ALLOWED_FOLD_FACTOR);

  // Do we support compound bloom filters?
  // In case of compound Bloom filters we ignore the maxKeys hint.
  CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
      err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
      bloomType == BloomType.ROWCOL ? CellComparatorImpl.COMPARATOR : null, bloomType);
  writer.addInlineBlockWriter(bloomWriter);
  return bloomWriter;
}
 
Example 14
Source File: StoreFileWriter.java    From hbase with Apache License 2.0 4 votes vote down vote up
/** For use in testing.
 */
HFile.Writer getHFileWriter() {
  return writer;
}
 
Example 15
Source File: TestInLineFileSystemHFileInLining.java    From hudi with Apache License 2.0 4 votes vote down vote up
private void writeRecords(HFile.Writer writer) throws IOException {
  writeSomeRecords(writer);
  writer.close();
}
 
Example 16
Source File: TestHalfStoreFileReader.java    From hbase with Apache License 2.0 4 votes vote down vote up
@Test
public void testHalfScanner() throws IOException {
  String root_dir = TEST_UTIL.getDataTestDir().toString();
  Path p = new Path(root_dir, "test");
  Configuration conf = TEST_UTIL.getConfiguration();
  FileSystem fs = FileSystem.get(conf);
  CacheConfig cacheConf = new CacheConfig(conf);
  HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build();
  HFile.Writer w =
      HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create();

  // write some things.
  List<KeyValue> items = genSomeKeys();
  for (KeyValue kv : items) {
    w.append(kv);
  }
  w.close();

  HFile.Reader r = HFile.createReader(fs, p, cacheConf, true, conf);
  Cell midKV = r.midKey().get();
  byte[] midkey = CellUtil.cloneRow(midKV);

  Reference bottom = new Reference(midkey, Reference.Range.bottom);
  Reference top = new Reference(midkey, Reference.Range.top);

  // Ugly code to get the item before the midkey
  KeyValue beforeMidKey = null;
  for (KeyValue item : items) {
    if (CellComparatorImpl.COMPARATOR.compare(item, midKV) >= 0) {
      break;
    }
    beforeMidKey = item;
  }
  System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey));
  System.out.println("beforeMidKey: " + beforeMidKey);

  // Seek on the splitKey, should be in top, not in bottom
  Cell foundKeyValue = doTestOfSeekBefore(p, fs, bottom, midKV, cacheConf);
  assertEquals(beforeMidKey, foundKeyValue);

  // Seek tot the last thing should be the penultimate on the top, the one before the midkey on
  // the bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(items.size() - 1), cacheConf);
  assertEquals(items.get(items.size() - 2), foundKeyValue);

  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(items.size() - 1), cacheConf);
  assertEquals(beforeMidKey, foundKeyValue);

  // Try and seek before something that is in the bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(0), cacheConf);
  assertNull(foundKeyValue);

  // Try and seek before the first thing.
  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(0), cacheConf);
  assertNull(foundKeyValue);

  // Try and seek before the second thing in the top and bottom.
  foundKeyValue = doTestOfSeekBefore(p, fs, top, items.get(1), cacheConf);
  assertNull(foundKeyValue);

  foundKeyValue = doTestOfSeekBefore(p, fs, bottom, items.get(1), cacheConf);
  assertEquals(items.get(0), foundKeyValue);

  // Try to seek before the splitKey in the top file
  foundKeyValue = doTestOfSeekBefore(p, fs, top, midKV, cacheConf);
  assertNull(foundKeyValue);
}
 
Example 17
Source File: BloomContext.java    From hbase with Apache License 2.0 2 votes vote down vote up
/**
 * Adds the last bloom key to the HFile Writer as part of StorefileWriter close.
 * @param writer
 * @throws IOException
 */
public abstract void addLastBloomKey(HFile.Writer writer) throws IOException;