Java Code Examples for org.apache.hadoop.hbase.regionserver.BloomType

The following examples show how to use org.apache.hadoop.hbase.regionserver.BloomType. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: atlas   Source File: HBaseBasedAuditRepository.java    License: Apache License 2.0 6 votes vote down vote up
private void createTableIfNotExists() throws AtlasException {
    Admin admin = null;
    try {
        admin = connection.getAdmin();
        LOG.info("Checking if table {} exists", tableName.getNameAsString());
        if (!admin.tableExists(tableName)) {
            LOG.info("Creating table {}", tableName.getNameAsString());
            HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
            HColumnDescriptor columnFamily = new HColumnDescriptor(COLUMN_FAMILY);
            columnFamily.setMaxVersions(1);
            columnFamily.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF);
            columnFamily.setCompressionType(Compression.Algorithm.GZ);
            columnFamily.setBloomFilterType(BloomType.ROW);
            tableDescriptor.addFamily(columnFamily);
            admin.createTable(tableDescriptor);
        } else {
            LOG.info("Table {} exists", tableName.getNameAsString());
        }
    } catch (IOException e) {
        throw new AtlasException(e);
    } finally {
        close(admin);
    }
}
 
Example 2
Source Project: incubator-atlas   Source File: HBaseBasedAuditRepository.java    License: Apache License 2.0 6 votes vote down vote up
private void createTableIfNotExists() throws AtlasException {
    Admin admin = null;
    try {
        admin = connection.getAdmin();
        LOG.info("Checking if table {} exists", tableName.getNameAsString());
        if (!admin.tableExists(tableName)) {
            LOG.info("Creating table {}", tableName.getNameAsString());
            HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
            HColumnDescriptor columnFamily = new HColumnDescriptor(COLUMN_FAMILY);
            columnFamily.setMaxVersions(1);
            columnFamily.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF);
            columnFamily.setCompressionType(Compression.Algorithm.GZ);
            columnFamily.setBloomFilterType(BloomType.ROW);
            tableDescriptor.addFamily(columnFamily);
            admin.createTable(tableDescriptor);
        } else {
            LOG.info("Table {} exists", tableName.getNameAsString());
        }
    } catch (IOException e) {
        throw new AtlasException(e);
    } finally {
        close(admin);
    }
}
 
Example 3
Source Project: eagle   Source File: HBaseEntitySchemaManager.java    License: Apache License 2.0 6 votes vote down vote up
private void createTable(EntityDefinition entityDefinition) throws IOException {
    String tableName = entityDefinition.getTable();
    if (admin.tableExists(tableName)) {
        LOG.info("Table {} already exists", tableName);
    } else {
        HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName));

        // Adding column families to table descriptor
        HColumnDescriptor columnDescriptor = new HColumnDescriptor(entityDefinition.getColumnFamily());
        columnDescriptor.setBloomFilterType(BloomType.ROW);
        //columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY);
        columnDescriptor.setMaxVersions(DEFAULT_MAX_VERSIONS);

        tableDescriptor.addFamily(columnDescriptor);

        // Execute the table through admin
        admin.createTable(tableDescriptor);
        LOG.info("Successfully create Table {}", tableName);
    }
}
 
Example 4
Source Project: phoenix   Source File: CreateTableIT.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateTableColumnFamilyHBaseAttribs8() throws Exception {
    String tableName = generateUniqueName();
    String ddl =
            "create table IF NOT EXISTS  " + tableName + "  (" + " id char(1) NOT NULL,"
                    + " col1 integer NOT NULL," + " col2 bigint NOT NULL,"
                    + " CONSTRAINT NAME_PK PRIMARY KEY (id, col1, col2)"
                    + " ) BLOOMFILTER = 'ROW', SALT_BUCKETS = 4";
    Properties props = new Properties();
    Connection conn = DriverManager.getConnection(getUrl(), props);
    conn.createStatement().execute(ddl);
    Admin admin = driver.getConnectionQueryServices(getUrl(), props).getAdmin();
    ColumnFamilyDescriptor[] columnFamilies =
            admin.getDescriptor(TableName.valueOf(tableName)).getColumnFamilies();
    assertEquals(BloomType.ROW, columnFamilies[0].getBloomFilterType());
}
 
Example 5
Source Project: hbase   Source File: AccessController.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create the ACL table
 * @throws IOException
 */
private static void createACLTable(Admin admin) throws IOException {
  /** Table descriptor for ACL table */
  ColumnFamilyDescriptor cfd =
      ColumnFamilyDescriptorBuilder.newBuilder(PermissionStorage.ACL_LIST_FAMILY).
      setMaxVersions(1).
      setInMemory(true).
      setBlockCacheEnabled(true).
      setBlocksize(8 * 1024).
      setBloomFilterType(BloomType.NONE).
      setScope(HConstants.REPLICATION_SCOPE_LOCAL).build();
  TableDescriptor td =
      TableDescriptorBuilder.newBuilder(PermissionStorage.ACL_TABLE_NAME).
        setColumnFamily(cfd).build();
  admin.createTable(td);
}
 
Example 6
Source Project: hbase   Source File: BloomFilterUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a Bloom filter chunk of the given size.
 *
 * @param byteSizeHint the desired number of bytes for the Bloom filter bit
 *          array. Will be increased so that folding is possible.
 * @param errorRate target false positive rate of the Bloom filter
 * @param hashType Bloom filter hash function type
 * @param foldFactor
 * @param bloomType
 * @return the new Bloom filter of the desired size
 */
public static BloomFilterChunk createBySize(int byteSizeHint,
    double errorRate, int hashType, int foldFactor, BloomType bloomType) {
  BloomFilterChunk bbf = new BloomFilterChunk(hashType, bloomType);

  bbf.byteSize = computeFoldableByteSize(byteSizeHint * 8L, foldFactor);
  long bitSize = bbf.byteSize * 8;
  bbf.maxKeys = (int) idealMaxKeys(bitSize, errorRate);
  bbf.hashCount = optimalFunctionCount(bbf.maxKeys, bitSize);

  // Adjust max keys to bring error rate closer to what was requested,
  // because byteSize was adjusted to allow for folding, and hashCount was
  // rounded.
  bbf.maxKeys = (int) computeMaxKeys(bitSize, errorRate, bbf.hashCount);

  return bbf;
}
 
Example 7
Source Project: hbase   Source File: BloomFilterChunk.java    License: Apache License 2.0 6 votes vote down vote up
public void add(Cell cell) {
  /*
   * For faster hashing, use combinatorial generation
   * http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
   */
  int hash1;
  int hash2;
  HashKey<Cell> hashKey;
  if (this.bloomType == BloomType.ROWCOL) {
    hashKey = new RowColBloomHashKey(cell);
    hash1 = this.hash.hash(hashKey, 0);
    hash2 = this.hash.hash(hashKey, hash1);
  } else {
    hashKey = new RowBloomHashKey(cell);
    hash1 = this.hash.hash(hashKey, 0);
    hash2 = this.hash.hash(hashKey, hash1);
  }
  setHashLoc(hash1, hash2);
}
 
Example 8
Source Project: hbase   Source File: CompoundBloomFilterWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void append(Cell cell) throws IOException {
  Objects.requireNonNull(cell);

  enqueueReadyChunk(false);

  if (chunk == null) {
    if (firstKeyInChunk != null) {
      throw new IllegalStateException("First key in chunk already set: "
          + Bytes.toStringBinary(firstKeyInChunk));
    }
    // This will be done only once per chunk
    if (bloomType == BloomType.ROWCOL) {
      firstKeyInChunk =
          PrivateCellUtil
              .getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(cell));
    } else {
      firstKeyInChunk = CellUtil.copyRow(cell);
    }
    allocateNewChunk();
  }

  chunk.add(cell);
  this.prevCell = cell;
  ++totalKeyCount;
}
 
Example 9
Source Project: hbase   Source File: CompoundBloomFilter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) {
  int block = index.rootBlockContainingKey(keyCell);
  if (block < 0) {
    return false; // This key is not in the file.
  }
  boolean result;
  HFileBlock bloomBlock = getBloomBlock(block);
  try {
    ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
    result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(),
      bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type);
  } finally {
    // After the use, should release the block to deallocate the byte buffers.
    bloomBlock.release();
  }
  if (numPositivesPerChunk != null && result) {
    // Update statistics. Only used in unit tests.
    ++numPositivesPerChunk[block];
  }
  return result;
}
 
Example 10
Source Project: hbase   Source File: TestMobFile.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetScanner() throws Exception {
  Path testDir = TEST_UTIL.getDataTestDir();
  FileSystem fs = testDir.getFileSystem(conf);
  HFileContext meta = new HFileContextBuilder().withBlockSize(8*1024).build();
  StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, fs)
          .withOutputDir(testDir)
          .withFileContext(meta)
          .build();
  MobTestUtil.writeStoreFile(writer, testName.getMethodName());

  MobFile mobFile =
      new MobFile(new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true));
  assertNotNull(mobFile.getScanner());
  assertTrue(mobFile.getScanner() instanceof StoreFileScanner);
}
 
Example 11
Source Project: hbase   Source File: TestMobStoreCompaction.java    License: Apache License 2.0 6 votes vote down vote up
private long countMobCellsInMetadata() throws IOException {
  long mobCellsCount = 0;
  Path mobDirPath = MobUtils.getMobFamilyPath(conf, tableDescriptor.getTableName(),
    familyDescriptor.getNameAsString());
  Configuration copyOfConf = new Configuration(conf);
  copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
  CacheConfig cacheConfig = new CacheConfig(copyOfConf);
  if (fs.exists(mobDirPath)) {
    FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath);
    for (FileStatus file : files) {
      HStoreFile sf = new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true);
      sf.initReader();
      Map<byte[], byte[]> fileInfo = sf.getReader().loadFileInfo();
      byte[] count = fileInfo.get(MOB_CELLS_COUNT);
      assertTrue(count != null);
      mobCellsCount += Bytes.toLong(count);
    }
  }
  return mobCellsCount;
}
 
Example 12
Source Project: hbase   Source File: TestBulkLoadHFiles.java    License: Apache License 2.0 6 votes vote down vote up
private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys,
    byte[][][] hfileRanges, boolean useMap) throws Exception {
  final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName);
  final boolean preCreateTable = tableSplitKeys != null;

  // Run the test bulkloading the table to the default namespace
  final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
  runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges,
    useMap, 2);

  /*
   * Run the test bulkloading the table from a depth of 3 directory structure is now baseDirectory
   * -- regionDir -- familyDir -- storeFileDir
   */
  if (preCreateTable) {
    runTest(testName + 2, TABLE_WITHOUT_NS, bloomType, true, tableSplitKeys, hfileRanges, false,
      3);
  }

  // Run the test bulkloading the table to the specified namespace
  final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
  runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges, useMap,
    2);
}
 
Example 13
Source Project: hbase   Source File: HBaseTestingUtility.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a table.
 * @param htd table descriptor
 * @param families array of column families
 * @param splitKeys array of split keys
 * @param type Bloom type
 * @param blockSize block size
 * @param c Configuration to use
 * @return A Table instance for the created table.
 * @throws IOException if getAdmin or createTable fails
 */

public Table createTable(TableDescriptor htd, byte[][] families, byte[][] splitKeys,
    BloomType type, int blockSize, Configuration c) throws IOException {
  TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(htd);
  for (byte[] family : families) {
    ColumnFamilyDescriptorBuilder cfdb = ColumnFamilyDescriptorBuilder.newBuilder(family)
      .setBloomFilterType(type)
      .setBlocksize(blockSize);
    if (isNewVersionBehaviorEnabled()) {
        cfdb.setNewVersionBehavior(true);
    }
    builder.setColumnFamily(cfdb.build());
  }
  TableDescriptor td = builder.build();
  if (splitKeys != null) {
    getAdmin().createTable(td, splitKeys);
  } else {
    getAdmin().createTable(td);
  }
  // HBaseAdmin only waits for regions to appear in hbase:meta
  // we should wait until they are assigned
  waitUntilAllRegionsAssigned(td.getTableName());
  return getConnection().getTable(td.getTableName());
}
 
Example 14
Source Project: hbase   Source File: HBaseTestingUtility.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a set of column descriptors with the combination of compression,
 * encoding, bloom codecs available.
 * @param prefix family names prefix
 * @return the list of column descriptors
 */
public static List<ColumnFamilyDescriptor> generateColumnDescriptors(final String prefix) {
  List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();
  long familyId = 0;
  for (Compression.Algorithm compressionType: getSupportedCompressionAlgorithms()) {
    for (DataBlockEncoding encodingType: DataBlockEncoding.values()) {
      for (BloomType bloomType: BloomType.values()) {
        String name = String.format("%[email protected]#&-%[email protected]#", prefix, familyId);
        ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder =
          ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(name));
        columnFamilyDescriptorBuilder.setCompressionType(compressionType);
        columnFamilyDescriptorBuilder.setDataBlockEncoding(encodingType);
        columnFamilyDescriptorBuilder.setBloomFilterType(bloomType);
        columnFamilyDescriptors.add(columnFamilyDescriptorBuilder.build());
        familyId++;
      }
    }
  }
  return columnFamilyDescriptors;
}
 
Example 15
Source Project: hbase   Source File: MobSnapshotTestingUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create a Mob table.
 *
 * @param util
 * @param tableName
 * @param families
 * @return An Table instance for the created table.
 * @throws IOException
 */
public static Table createMobTable(final HBaseTestingUtility util,
    final TableName tableName, final byte[]... families) throws IOException {
  TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
  for (byte[] family : families) {
    // Disable blooms (they are on by default as of 0.95) but we disable them
    // here because
    // tests have hard coded counts of what to expect in block cache, etc.,
    // and blooms being
    // on is interfering.
    builder.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(family)
            .setBloomFilterType(BloomType.NONE)
            .setMobEnabled(true)
            .setMobThreshold(0L)
            .build());
  }
  util.getAdmin().createTable(builder.build());
  // HBaseAdmin only waits for regions to appear in hbase:meta we should wait
  // until they are assigned
  util.waitUntilAllRegionsAssigned(tableName);
  return ConnectionFactory.createConnection(util.getConfiguration()).getTable(tableName);
}
 
Example 16
Source Project: hbase   Source File: TestHFileOutputFormat2.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @return a map from column family names to compression algorithms for
 *         testing column family compression. Column family names have special characters
 */
private Map<String, BloomType>
getMockColumnFamiliesForBloomType (int numCfs) {
  Map<String, BloomType> familyToBloomType = new HashMap<>();
  // use column family names having special characters
  if (numCfs-- > 0) {
    familyToBloomType.put("[email protected]#[email protected]#&", BloomType.ROW);
  }
  if (numCfs-- > 0) {
    familyToBloomType.put("Family2=asdads&!AASD",
        BloomType.ROWCOL);
  }
  if (numCfs-- > 0) {
    familyToBloomType.put("Family3", BloomType.NONE);
  }
  return familyToBloomType;
}
 
Example 17
Source Project: examples   Source File: CreateTable.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws MasterNotRunningException,
    ZooKeeperConnectionException, IOException {
  try (Connection connection = ConnectionFactory.createConnection();
      Admin admin = connection.getAdmin();) {
    LOG.info("Starting table creation");
    // tag::CREATE[]
    TableName documents = TableName.valueOf("documents");
    HTableDescriptor desc = new HTableDescriptor(documents);
    HColumnDescriptor family = new HColumnDescriptor("c");
    family.setCompressionType(Algorithm.GZ);
    family.setBloomFilterType(BloomType.NONE);
    desc.addFamily(family);
    UniformSplit uniformSplit = new UniformSplit();
    admin.createTable(desc, uniformSplit.split(8));
    // end::CREATE[]
    LOG.info("Table successfuly created");
  }
}
 
Example 18
Source Project: examples   Source File: Create2.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
  Configuration conf = HBaseConfiguration.create();
  HBaseAdmin admin = new HBaseAdmin(conf);
  // tag::CREATE2[]
  HTableDescriptor desc = new HTableDescriptor(TableName.valueOf("pages"));
  byte[][] splits = {Bytes.toBytes("b"), Bytes.toBytes("f"),
    Bytes.toBytes("k"), Bytes.toBytes("n"), Bytes.toBytes("t")};
  desc.setValue(Bytes.toBytes("comment"), Bytes.toBytes("Create 10012014"));
  HColumnDescriptor family = new HColumnDescriptor("c");
  family.setCompressionType(Algorithm.GZ);
  family.setMaxVersions(52);
  family.setBloomFilterType(BloomType.ROW);
  desc.addFamily(family);
  admin.createTable(desc, splits);
  // end::CREATE2[]
  admin.close();
}
 
Example 19
Source Project: kylin-on-parquet-v2   Source File: HFileOutputFormat3.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Runs inside the task to deserialize column family to bloom filter type
 * map from the configuration.
 *
 * @param conf to read the serialized values from
 * @return a map from column family to the the configured bloom filter type
 */
@VisibleForTesting
static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
    Map<byte[], String> stringMap = createFamilyConfValueMap(conf, BLOOM_TYPE_FAMILIES_CONF_KEY);
    Map<byte[], BloomType> bloomTypeMap = new TreeMap<byte[], BloomType>(Bytes.BYTES_COMPARATOR);
    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
        BloomType bloomType = BloomType.valueOf(e.getValue());
        bloomTypeMap.put(e.getKey(), bloomType);
    }
    return bloomTypeMap;
}
 
Example 20
Source Project: Halyard   Source File: HalyardTableUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static HColumnDescriptor createColumnFamily() {
    return new HColumnDescriptor(CF_NAME)
            .setMaxVersions(1)
            .setBlockCacheEnabled(true)
            .setBloomFilterType(BloomType.ROW)
            .setCompressionType(DEFAULT_COMPRESSION_ALGORITHM)
            .setDataBlockEncoding(DEFAULT_DATABLOCK_ENCODING)
            .setCacheBloomsOnWrite(true)
            .setCacheDataOnWrite(true)
            .setCacheIndexesOnWrite(true)
            .setKeepDeletedCells(KeepDeletedCells.FALSE)
            .setValue(HTableDescriptor.MAX_FILESIZE, REGION_MAX_FILESIZE)
            .setValue(HTableDescriptor.SPLIT_POLICY, REGION_SPLIT_POLICY);
}
 
Example 21
Source Project: hgraphdb   Source File: HBaseGraphUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static void createTable(HBaseGraphConfiguration config, Admin admin, String name, int ttl) throws IOException {
    TableName tableName = getTableName(config, name);
    if (admin.tableExists(tableName)) return;
    try {
        HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
        tableDescriptor.setDurability(config.getTableAsyncWAL() ? Durability.ASYNC_WAL : Durability.USE_DEFAULT);
        HColumnDescriptor columnDescriptor = new HColumnDescriptor(DEFAULT_FAMILY)
                .setCompressionType(Compression.Algorithm.valueOf(config.getCompressionAlgorithm().toUpperCase()))
                .setBloomFilterType(BloomType.ROW)
                .setDataBlockEncoding(DataBlockEncoding.FAST_DIFF)
                .setMaxVersions(1)
                .setMinVersions(0)
                .setBlocksize(32768)
                .setBlockCacheEnabled(true)
                .setTimeToLive(ttl);
        tableDescriptor.addFamily(columnDescriptor);
        int regionCount = config.getRegionCount();
        if (regionCount <= 1) {
            admin.createTable(tableDescriptor);
        } else {
            admin.createTable(tableDescriptor, getStartKey(regionCount), getEndKey(regionCount), regionCount);
        }
    } catch (IOException e) {
        LOGGER.error("Could not create table " + tableName, e);
        throw e;
    }
}
 
Example 22
Source Project: kylin   Source File: HFileOutputFormat3.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Runs inside the task to deserialize column family to bloom filter type
 * map from the configuration.
 *
 * @param conf to read the serialized values from
 * @return a map from column family to the the configured bloom filter type
 */
@VisibleForTesting
static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
    Map<byte[], String> stringMap = createFamilyConfValueMap(conf, BLOOM_TYPE_FAMILIES_CONF_KEY);
    Map<byte[], BloomType> bloomTypeMap = new TreeMap<byte[], BloomType>(Bytes.BYTES_COMPARATOR);
    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
        BloomType bloomType = BloomType.valueOf(e.getValue());
        bloomTypeMap.put(e.getKey(), bloomType);
    }
    return bloomTypeMap;
}
 
Example 23
Source Project: hbase   Source File: ThriftUtilities.java    License: Apache License 2.0 5 votes vote down vote up
public static BloomType bloomFilterFromThrift(TBloomFilterType in) {
  switch (in.getValue()) {
    case 0: return BloomType.NONE;
    case 1: return BloomType.ROW;
    case 2: return BloomType.ROWCOL;
    case 3: return BloomType.ROWPREFIX_FIXED_LENGTH;
    default: return BloomType.ROW;
  }
}
 
Example 24
Source Project: phoenix   Source File: MultiHfileOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Runs inside the task to deserialize column family to bloom filter type
 * map from the configuration.
 *
 * @param conf to read the serialized values from
 * @return a map from column family to the the configured bloom filter type
 */
@VisibleForTesting
static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf,final String tableName) {
    Map<byte[], BloomType> bloomTypeMap = new TreeMap<byte[],BloomType>(Bytes.BYTES_COMPARATOR);
    Map<String, String> tableConfigs = getTableConfigurations(conf, tableName);
    if(tableConfigs == null) {
        return bloomTypeMap;
    }
    Map<byte[], String> stringMap = createFamilyConfValueMap(tableConfigs,BLOOM_TYPE_FAMILIES_CONF_KEY);
    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
       BloomType bloomType = BloomType.valueOf(e.getValue());
       bloomTypeMap.put(e.getKey(), bloomType);
   }
   return bloomTypeMap;
}
 
Example 25
Source Project: hbase   Source File: CachedMobFile.java    License: Apache License 2.0 5 votes vote down vote up
public static CachedMobFile create(FileSystem fs, Path path, Configuration conf,
    CacheConfig cacheConf) throws IOException {
  // XXX: primaryReplica is only used for constructing the key of block cache so it is not a
  // critical problem if we pass the wrong value, so here we always pass true. Need to fix later.
  HStoreFile sf = new HStoreFile(fs, path, conf, cacheConf, BloomType.NONE, true);
  return new CachedMobFile(sf);
}
 
Example 26
Source Project: hbase   Source File: BloomFilterUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static byte[] getBloomFilterParam(BloomType bloomFilterType, Configuration conf)
    throws IllegalArgumentException {
  byte[] bloomParam = null;
  String message = "Bloom filter type is " + bloomFilterType + ", ";
  if (bloomFilterType.equals(ROWPREFIX_FIXED_LENGTH)) {
    String prefixLengthString = conf.get(PREFIX_LENGTH_KEY);
    if (prefixLengthString == null) {
      message += PREFIX_LENGTH_KEY + " not specified.";
      throw new IllegalArgumentException(message);
    }
    int prefixLength;
    try {
      prefixLength = Integer.parseInt(prefixLengthString);
      if (prefixLength <= 0 || prefixLength > HConstants.MAX_ROW_LENGTH) {
        message +=
            "the value of " + PREFIX_LENGTH_KEY + " must >=0 and < " + HConstants.MAX_ROW_LENGTH;
        throw new IllegalArgumentException(message);
      }
    } catch (NumberFormatException nfe) {
      message = "Number format exception when parsing " + PREFIX_LENGTH_KEY + " for BloomType " +
          bloomFilterType.toString() + ":" + prefixLengthString;
      throw new IllegalArgumentException(message, nfe);
    }
    bloomParam = Bytes.toBytes(prefixLength);
  }
  return bloomParam;
}
 
Example 27
Source Project: hbase   Source File: BloomFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new general (Row or RowCol) Bloom filter at the time of
 * {@link org.apache.hadoop.hbase.regionserver.HStoreFile} writing.
 *
 * @param conf
 * @param cacheConf
 * @param bloomType
 * @param maxKeys an estimate of the number of keys we expect to insert.
 *        Irrelevant if compound Bloom filters are enabled.
 * @param writer the HFile writer
 * @return the new Bloom filter, or null in case Bloom filters are disabled
 *         or when failed to create one.
 */
public static BloomFilterWriter createGeneralBloomAtWrite(Configuration conf,
    CacheConfig cacheConf, BloomType bloomType, int maxKeys,
    HFile.Writer writer) {
  if (!isGeneralBloomEnabled(conf)) {
    LOG.trace("Bloom filters are disabled by configuration for "
        + writer.getPath()
        + (conf == null ? " (configuration is null)" : ""));
    return null;
  } else if (bloomType == BloomType.NONE) {
    LOG.trace("Bloom filter is turned off for the column family");
    return null;
  }

  float err = getErrorRate(conf);

  // In case of row/column Bloom filter lookups, each lookup is an OR if two
  // separate lookups. Therefore, if each lookup's false positive rate is p,
  // the resulting false positive rate is err = 1 - (1 - p)^2, and
  // p = 1 - sqrt(1 - err).
  if (bloomType == BloomType.ROWCOL) {
    err = (float) (1 - Math.sqrt(1 - err));
  }

  int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
      MAX_ALLOWED_FOLD_FACTOR);

  // Do we support compound bloom filters?
  // In case of compound Bloom filters we ignore the maxKeys hint.
  CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf),
      err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(),
      bloomType == BloomType.ROWCOL ? CellComparatorImpl.COMPARATOR : null, bloomType);
  writer.addInlineBlockWriter(bloomWriter);
  return bloomWriter;
}
 
Example 28
public static HColumnDescriptor createDataFamily() {
    HColumnDescriptor snapshot = new HColumnDescriptor(SIConstants.DEFAULT_FAMILY_BYTES);
    snapshot.setMaxVersions(Integer.MAX_VALUE);
    snapshot.setCompressionType(Compression.Algorithm.NONE);
    snapshot.setInMemory(true);
    snapshot.setBlockCacheEnabled(true);
    snapshot.setBloomFilterType(BloomType.ROW);
    return snapshot;
}
 
Example 29
Source Project: hbase   Source File: CompoundBloomFilterWriter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param chunkByteSizeHint
 *          each chunk's size in bytes. The real chunk size might be different
 *          as required by the fold factor.
 * @param errorRate
 *          target false positive rate
 * @param hashType
 *          hash function type to use
 * @param maxFold
 *          maximum degree of folding allowed
 * @param bloomType
 *          the bloom type
 */
public CompoundBloomFilterWriter(int chunkByteSizeHint, float errorRate,
    int hashType, int maxFold, boolean cacheOnWrite,
    CellComparator comparator, BloomType bloomType) {
  chunkByteSize = BloomFilterUtil.computeFoldableByteSize(
      chunkByteSizeHint * 8L, maxFold);

  this.errorRate = errorRate;
  this.hashType = hashType;
  this.maxFold = maxFold;
  this.cacheOnWrite = cacheOnWrite;
  this.comparator = comparator;
  this.bloomType = bloomType;
}
 
Example 30
Source Project: hbase   Source File: TestBulkLoadHFiles.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleLoadWithMap() throws Exception {
  runTest("testSimpleLoadWithMap", BloomType.NONE,
    new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
      new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, },
    true);
}