Java Code Examples for org.apache.hadoop.util.hash.Hash

The following examples show how to use org.apache.hadoop.util.hash.Hash. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: HashFunction.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example 2
Source Project: hadoop   Source File: TestBloomFilters.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDynamicBloomFilter() {
  int hashId = Hash.JENKINS_HASH;    
  Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber,
      Hash.JENKINS_HASH, 3);    
  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY))
              .test();
  
  assertNotNull("testDynamicBloomFilter error ", filter.toString());
}
 
Example 3
Source Project: hadoop   Source File: TestBloomFilters.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithJenkinsHash() {
  int hashId = Hash.JENKINS_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example 4
Source Project: hadoop   Source File: TestBloomFilters.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithMurmurHash() {
  int hashId = Hash.MURMUR_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example 5
Source Project: hadoop   Source File: BloomFilterCommonTester.java    License: Apache License 2.0 6 votes vote down vote up
private BloomFilterCommonTester(int hashId, int numInsertions) {
  this.hashType = hashId;
  this.numInsertions = numInsertions;

  this.preAssertionHelper = new PreAssertionHelper() {

    @Override
    public ImmutableSet<Integer> falsePositives(int hashId) {
      switch (hashId) {
      case Hash.JENKINS_HASH: {
        // // false pos for odd and event under 1000
        return ImmutableSet.of(99, 963);
      }
      case Hash.MURMUR_HASH: {
        // false pos for odd and event under 1000
        return ImmutableSet.of(769, 772, 810, 874);
      }
      default: {
        // fail fast with unknown hash error !!!
        Assert.assertFalse("unknown hash error", true);
        return ImmutableSet.of();
      }
      }
    }
  };
}
 
Example 6
Source Project: big-c   Source File: HashFunction.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example 7
Source Project: big-c   Source File: TestBloomFilters.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDynamicBloomFilter() {
  int hashId = Hash.JENKINS_HASH;    
  Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber,
      Hash.JENKINS_HASH, 3);    
  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY))
              .test();
  
  assertNotNull("testDynamicBloomFilter error ", filter.toString());
}
 
Example 8
Source Project: big-c   Source File: TestBloomFilters.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithJenkinsHash() {
  int hashId = Hash.JENKINS_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example 9
Source Project: big-c   Source File: TestBloomFilters.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithMurmurHash() {
  int hashId = Hash.MURMUR_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example 10
Source Project: big-c   Source File: BloomFilterCommonTester.java    License: Apache License 2.0 6 votes vote down vote up
private BloomFilterCommonTester(int hashId, int numInsertions) {
  this.hashType = hashId;
  this.numInsertions = numInsertions;

  this.preAssertionHelper = new PreAssertionHelper() {

    @Override
    public ImmutableSet<Integer> falsePositives(int hashId) {
      switch (hashId) {
      case Hash.JENKINS_HASH: {
        // // false pos for odd and event under 1000
        return ImmutableSet.of(99, 963);
      }
      case Hash.MURMUR_HASH: {
        // false pos for odd and event under 1000
        return ImmutableSet.of(769, 772, 810, 874);
      }
      default: {
        // fail fast with unknown hash error !!!
        Assert.assertFalse("unknown hash error", true);
        return ImmutableSet.of();
      }
      }
    }
  };
}
 
Example 11
Source Project: streaminer   Source File: HashFunction.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example 12
Source Project: RDFS   Source File: HashFunction.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example 13
Source Project: hadoop-gpu   Source File: HashFunction.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example 14
Source Project: hadoop   Source File: Filter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
Example 15
Source Project: hadoop   Source File: BloomMapFile.java    License: Apache License 2.0 5 votes vote down vote up
private synchronized void initBloomFilter(Configuration conf) {
  numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
  // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
  // single key, where <code> is the number of hash functions,
  // <code>n</code> is the number of keys and <code>c</code> is the desired
  // max. error rate.
  // Our desired error rate is by default 0.005, i.e. 0.5%
  float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
  vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
      Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
  bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
      Hash.getHashType(conf), numKeys);
}
 
Example 16
Source Project: hadoop   Source File: TestBloomFilters.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCountingBloomFilter() {
  int hashId = Hash.JENKINS_HASH;

  CountingBloomFilter filter = new CountingBloomFilter(bitSize,
      hashFunctionNumber, hashId);

  Key key = new Key(new byte[] { 48, 48 });

  filter.add(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 1);

  filter.add(key);
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 2);

  filter.delete(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));

  filter.delete(key);
  assertFalse("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 0);

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example 17
Source Project: hadoop   Source File: TestBloomFilters.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRetouchedBloomFilterSpecific() {
  int numInsertions = 1000;
  int hashFunctionNumber = 5;

  ImmutableSet<Integer> hashes = ImmutableSet.of(Hash.MURMUR_HASH,
      Hash.JENKINS_HASH);

  for (Integer hashId : hashes) {      
    RetouchedBloomFilter filter = new RetouchedBloomFilter(bitSize,
        hashFunctionNumber, hashId);

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));
  }
}
 
Example 18
Source Project: hadoop   Source File: TestBloomFilters.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNot() {
  BloomFilter bf = new BloomFilter(8, 1, Hash.JENKINS_HASH);
  bf.bits = BitSet.valueOf(new byte[] { (byte) 0x95 });
  BitSet origBitSet = (BitSet) bf.bits.clone();
  bf.not();
  assertFalse("BloomFilter#not should have inverted all bits",
              bf.bits.intersects(origBitSet));
}
 
Example 19
Source Project: big-c   Source File: Filter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
Example 20
Source Project: big-c   Source File: BloomMapFile.java    License: Apache License 2.0 5 votes vote down vote up
private synchronized void initBloomFilter(Configuration conf) {
  numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
  // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
  // single key, where <code> is the number of hash functions,
  // <code>n</code> is the number of keys and <code>c</code> is the desired
  // max. error rate.
  // Our desired error rate is by default 0.005, i.e. 0.5%
  float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
  vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
      Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
  bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
      Hash.getHashType(conf), numKeys);
}
 
Example 21
Source Project: big-c   Source File: TestBloomFilters.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCountingBloomFilter() {
  int hashId = Hash.JENKINS_HASH;

  CountingBloomFilter filter = new CountingBloomFilter(bitSize,
      hashFunctionNumber, hashId);

  Key key = new Key(new byte[] { 48, 48 });

  filter.add(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 1);

  filter.add(key);
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 2);

  filter.delete(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));

  filter.delete(key);
  assertFalse("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 0);

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example 22
Source Project: big-c   Source File: TestBloomFilters.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRetouchedBloomFilterSpecific() {
  int numInsertions = 1000;
  int hashFunctionNumber = 5;

  ImmutableSet<Integer> hashes = ImmutableSet.of(Hash.MURMUR_HASH,
      Hash.JENKINS_HASH);

  for (Integer hashId : hashes) {      
    RetouchedBloomFilter filter = new RetouchedBloomFilter(bitSize,
        hashFunctionNumber, hashId);

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));
  }
}
 
Example 23
Source Project: big-c   Source File: TestBloomFilters.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNot() {
  BloomFilter bf = new BloomFilter(8, 1, Hash.JENKINS_HASH);
  bf.bits = BitSet.valueOf(new byte[] { (byte) 0x95 });
  BitSet origBitSet = (BitSet) bf.bits.clone();
  bf.not();
  assertFalse("BloomFilter#not should have inverted all bits",
              bf.bits.intersects(origBitSet));
}
 
Example 24
Source Project: incubator-hivemall   Source File: BloomFilterUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Nonnull
public static BloomFilter newBloomFilter(@Nonnegative final int expectedNumberOfElements,
        @Nonnegative final float errorRate, @Nonnegative final int nbHash) {
    // vector size should be `-kn / (ln(1 - c^(1/k)))` bits for
    // single key, where `k` is the number of hash functions,
    // `n` is the number of keys and `c` is the desired max error rate.
    int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements)
            / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash)));
    return new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH);
}
 
Example 25
Source Project: incubator-hivemall   Source File: BloomFilterUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Nonnull
public static DynamicBloomFilter newDynamicBloomFilter(
        @Nonnegative final int expectedNumberOfElements, @Nonnegative final float errorRate,
        @Nonnegative final int nbHash) {
    int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements)
            / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash)));
    return new DynamicBloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH,
        expectedNumberOfElements);
}
 
Example 26
Source Project: hudi   Source File: InternalFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
Example 27
Source Project: hudi   Source File: BloomFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new {@link BloomFilter} with the given args.
 *
 * @param numEntries          total number of entries
 * @param errorRate           max allowed error rate
 * @param bloomFilterTypeCode bloom filter type code
 * @return the {@link BloomFilter} thus created
 */
public static BloomFilter createBloomFilter(int numEntries, double errorRate, int maxNumberOfEntries,
                                            String bloomFilterTypeCode) {
  if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.SIMPLE.name())) {
    return new SimpleBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH);
  } else if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.DYNAMIC_V0.name())) {
    return new HoodieDynamicBoundedBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH, maxNumberOfEntries);
  } else {
    throw new IllegalArgumentException("Bloom Filter type code not recognizable " + bloomFilterTypeCode);
  }
}
 
Example 28
Source Project: hudi   Source File: TestInternalDynamicBloomFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBoundedSize() {

  int[] batchSizes = {1000, 10000, 10000, 100000, 100000, 10000};
  int indexForMaxGrowth = 3;
  int maxSize = batchSizes[0] * 100;
  BloomFilter filter = new HoodieDynamicBoundedBloomFilter(batchSizes[0], 0.000001, Hash.MURMUR_HASH, maxSize);
  int index = 0;
  int lastKnownBloomSize = 0;
  while (index < batchSizes.length) {
    for (int i = 0; i < batchSizes[index]; i++) {
      String key = UUID.randomUUID().toString();
      filter.add(key);
    }

    String serString = filter.serializeToString();
    if (index != 0) {
      int curLength = serString.length();
      if (index > indexForMaxGrowth) {
        assertEquals(curLength, lastKnownBloomSize, "Length should not increase after hitting max entries");
      } else {
        assertTrue(curLength > lastKnownBloomSize, "Length should increase until max entries are reached");
      }
    }
    lastKnownBloomSize = serString.length();
    index++;
  }
}
 
Example 29
Source Project: compiler   Source File: DistinctAggregator.java    License: Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override
public void start(final EmitKey key) {
	super.start(key);

	this.filter = new DynamicBloomFilter(this.vectorSize, HASH_COUNT, Hash.MURMUR_HASH, (int) this.getArg());
}
 
Example 30
Source Project: spork   Source File: BuildBloomBase.java    License: Apache License 2.0 5 votes vote down vote up
private int convertHashType(String hashType) {
    if (hashType.toLowerCase().contains("jenkins")) {
        return Hash.JENKINS_HASH;
    } else if (hashType.toLowerCase().contains("murmur")) {
        return Hash.MURMUR_HASH;
    } else {
        throw new RuntimeException("Unknown hash type " + hashType +
            ".  Valid values are jenkins and murmur.");
    }
}