Java Code Examples for org.apache.hadoop.util.hash.Hash#MURMUR_HASH

The following examples show how to use org.apache.hadoop.util.hash.Hash#MURMUR_HASH . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBloomFilters.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithMurmurHash() {
  int hashId = Hash.MURMUR_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example 2
Source File: BloomFilterCommonTester.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private BloomFilterCommonTester(int hashId, int numInsertions) {
  this.hashType = hashId;
  this.numInsertions = numInsertions;

  this.preAssertionHelper = new PreAssertionHelper() {

    @Override
    public ImmutableSet<Integer> falsePositives(int hashId) {
      switch (hashId) {
      case Hash.JENKINS_HASH: {
        // // false pos for odd and event under 1000
        return ImmutableSet.of(99, 963);
      }
      case Hash.MURMUR_HASH: {
        // false pos for odd and event under 1000
        return ImmutableSet.of(769, 772, 810, 874);
      }
      default: {
        // fail fast with unknown hash error !!!
        Assert.assertFalse("unknown hash error", true);
        return ImmutableSet.of();
      }
      }
    }
  };
}
 
Example 3
Source File: TestBloomFilters.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithMurmurHash() {
  int hashId = Hash.MURMUR_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example 4
Source File: BloomFilterCommonTester.java    From big-c with Apache License 2.0 6 votes vote down vote up
private BloomFilterCommonTester(int hashId, int numInsertions) {
  this.hashType = hashId;
  this.numInsertions = numInsertions;

  this.preAssertionHelper = new PreAssertionHelper() {

    @Override
    public ImmutableSet<Integer> falsePositives(int hashId) {
      switch (hashId) {
      case Hash.JENKINS_HASH: {
        // // false pos for odd and event under 1000
        return ImmutableSet.of(99, 963);
      }
      case Hash.MURMUR_HASH: {
        // false pos for odd and event under 1000
        return ImmutableSet.of(769, 772, 810, 874);
      }
      default: {
        // fail fast with unknown hash error !!!
        Assert.assertFalse("unknown hash error", true);
        return ImmutableSet.of();
      }
      }
    }
  };
}
 
Example 5
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static BloomFilter newBloomFilter(@Nonnegative final int expectedNumberOfElements,
        @Nonnegative final float errorRate, @Nonnegative final int nbHash) {
    // vector size should be `-kn / (ln(1 - c^(1/k)))` bits for
    // single key, where `k` is the number of hash functions,
    // `n` is the number of keys and `c` is the desired max error rate.
    int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements)
            / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash)));
    return new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH);
}
 
Example 6
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static DynamicBloomFilter newDynamicBloomFilter(
        @Nonnegative final int expectedNumberOfElements, @Nonnegative final float errorRate,
        @Nonnegative final int nbHash) {
    int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements)
            / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash)));
    return new DynamicBloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH,
        expectedNumberOfElements);
}
 
Example 7
Source File: BloomFilterFactory.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new {@link BloomFilter} with the given args.
 *
 * @param numEntries          total number of entries
 * @param errorRate           max allowed error rate
 * @param bloomFilterTypeCode bloom filter type code
 * @return the {@link BloomFilter} thus created
 */
public static BloomFilter createBloomFilter(int numEntries, double errorRate, int maxNumberOfEntries,
                                            String bloomFilterTypeCode) {
  if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.SIMPLE.name())) {
    return new SimpleBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH);
  } else if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.DYNAMIC_V0.name())) {
    return new HoodieDynamicBoundedBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH, maxNumberOfEntries);
  } else {
    throw new IllegalArgumentException("Bloom Filter type code not recognizable " + bloomFilterTypeCode);
  }
}
 
Example 8
Source File: TestInternalDynamicBloomFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Test
public void testBoundedSize() {

  int[] batchSizes = {1000, 10000, 10000, 100000, 100000, 10000};
  int indexForMaxGrowth = 3;
  int maxSize = batchSizes[0] * 100;
  BloomFilter filter = new HoodieDynamicBoundedBloomFilter(batchSizes[0], 0.000001, Hash.MURMUR_HASH, maxSize);
  int index = 0;
  int lastKnownBloomSize = 0;
  while (index < batchSizes.length) {
    for (int i = 0; i < batchSizes[index]; i++) {
      String key = UUID.randomUUID().toString();
      filter.add(key);
    }

    String serString = filter.serializeToString();
    if (index != 0) {
      int curLength = serString.length();
      if (index > indexForMaxGrowth) {
        assertEquals(curLength, lastKnownBloomSize, "Length should not increase after hitting max entries");
      } else {
        assertTrue(curLength > lastKnownBloomSize, "Length should increase until max entries are reached");
      }
    }
    lastKnownBloomSize = serString.length();
    index++;
  }
}
 
Example 9
Source File: DistinctAggregator.java    From compiler with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override
public void start(final EmitKey key) {
	super.start(key);

	this.filter = new DynamicBloomFilter(this.vectorSize, HASH_COUNT, Hash.MURMUR_HASH, (int) this.getArg());
}
 
Example 10
Source File: BuildBloomBase.java    From spork with Apache License 2.0 5 votes vote down vote up
private int convertHashType(String hashType) {
    if (hashType.toLowerCase().contains("jenkins")) {
        return Hash.JENKINS_HASH;
    } else if (hashType.toLowerCase().contains("murmur")) {
        return Hash.MURMUR_HASH;
    } else {
        throw new RuntimeException("Unknown hash type " + hashType +
            ".  Valid values are jenkins and murmur.");
    }
}