org.apache.hadoop.util.hash.Hash Java Examples

The following examples show how to use org.apache.hadoop.util.hash.Hash. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HashFunction.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example #2
Source File: HashFunction.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example #3
Source File: HashFunction.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example #4
Source File: HashFunction.java    From streaminer with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example #5
Source File: BloomFilterCommonTester.java    From big-c with Apache License 2.0 6 votes vote down vote up
private BloomFilterCommonTester(int hashId, int numInsertions) {
  this.hashType = hashId;
  this.numInsertions = numInsertions;

  this.preAssertionHelper = new PreAssertionHelper() {

    @Override
    public ImmutableSet<Integer> falsePositives(int hashId) {
      switch (hashId) {
      case Hash.JENKINS_HASH: {
        // // false pos for odd and event under 1000
        return ImmutableSet.of(99, 963);
      }
      case Hash.MURMUR_HASH: {
        // false pos for odd and event under 1000
        return ImmutableSet.of(769, 772, 810, 874);
      }
      default: {
        // fail fast with unknown hash error !!!
        Assert.assertFalse("unknown hash error", true);
        return ImmutableSet.of();
      }
      }
    }
  };
}
 
Example #6
Source File: TestBloomFilters.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithMurmurHash() {
  int hashId = Hash.MURMUR_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example #7
Source File: TestBloomFilters.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithJenkinsHash() {
  int hashId = Hash.JENKINS_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example #8
Source File: TestBloomFilters.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testDynamicBloomFilter() {
  int hashId = Hash.JENKINS_HASH;    
  Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber,
      Hash.JENKINS_HASH, 3);    
  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY))
              .test();
  
  assertNotNull("testDynamicBloomFilter error ", filter.toString());
}
 
Example #9
Source File: HashFunction.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor.
 * <p>
 * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
 * @param maxValue The maximum highest returned value.
 * @param nbHash The number of resulting hashed values.
 * @param hashType type of the hashing function (see {@link Hash}).
 */
public HashFunction(int maxValue, int nbHash, int hashType) {
  if (maxValue <= 0) {
    throw new IllegalArgumentException("maxValue must be > 0");
  }
  
  if (nbHash <= 0) {
    throw new IllegalArgumentException("nbHash must be > 0");
  }

  this.maxValue = maxValue;
  this.nbHash = nbHash;
  this.hashFunction = Hash.getInstance(hashType);
  if (this.hashFunction == null)
    throw new IllegalArgumentException("hashType must be known");
}
 
Example #10
Source File: TestBloomFilters.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithMurmurHash() {
  int hashId = Hash.MURMUR_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example #11
Source File: TestBloomFilters.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testFiltersWithJenkinsHash() {
  int hashId = Hash.JENKINS_HASH;

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(new BloomFilter(bitSize, hashFunctionNumber, hashId))
      .withFilterInstance(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId))
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_AND_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example #12
Source File: TestBloomFilters.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testDynamicBloomFilter() {
  int hashId = Hash.JENKINS_HASH;    
  Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber,
      Hash.JENKINS_HASH, 3);    
  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY))
              .test();
  
  assertNotNull("testDynamicBloomFilter error ", filter.toString());
}
 
Example #13
Source File: BloomFilterCommonTester.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private BloomFilterCommonTester(int hashId, int numInsertions) {
  this.hashType = hashId;
  this.numInsertions = numInsertions;

  this.preAssertionHelper = new PreAssertionHelper() {

    @Override
    public ImmutableSet<Integer> falsePositives(int hashId) {
      switch (hashId) {
      case Hash.JENKINS_HASH: {
        // // false pos for odd and event under 1000
        return ImmutableSet.of(99, 963);
      }
      case Hash.MURMUR_HASH: {
        // false pos for odd and event under 1000
        return ImmutableSet.of(769, 772, 810, 874);
      }
      default: {
        // fail fast with unknown hash error !!!
        Assert.assertFalse("unknown hash error", true);
        return ImmutableSet.of();
      }
      }
    }
  };
}
 
Example #14
Source File: InternalFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
Example #15
Source File: BloomMapFile.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
private synchronized void initBloomFilter(Configuration conf) {
  numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
  // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
  // single key, where <code> is the number of hash functions,
  // <code>n</code> is the number of keys and <code>c</code> is the desired
  // max. error rate.
  // Our desired error rate is by default 0.005, i.e. 0.5%
  float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
  vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
      Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
  bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
      Hash.getHashType(conf), numKeys);
}
 
Example #16
Source File: Filter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
Example #17
Source File: Filter.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
Example #18
Source File: QueryParser.java    From accumulo-recipes with Apache License 2.0 5 votes vote down vote up
public void execute(String query) throws ParseException {
    reset();
    query = query.replaceAll("\\s+AND\\s+", " and ");
    query = query.replaceAll("\\s+OR\\s+", " or ");
    query = query.replaceAll("\\s+NOT\\s+", " not ");

    // Check to see if its in the cache
    Hash hash = MurmurHash.getInstance();
    this.hashVal = hash.hash(query.getBytes(), SEED);
    CacheEntry entry = null;
    synchronized (cache) {
        entry = (CacheEntry) cache.get(hashVal);
    }
    if (entry != null) {
        this.negatedTerms = entry.getNegatedTerms();
        this.andTerms = entry.getAndTerms();
        this.orTerms = entry.getOrTerms();
        this.literals = entry.getLiterals();
        this.terms = entry.getTerms();
        this.rootNode = entry.getRootNode();
        this.tree = entry.getTree();
    } else {
        Parser p = new Parser(new StringReader(";"));
        rootNode = p.parse(new StringReader(query), null);
        rootNode.childrenAccept(this, null);
        TreeBuilder builder = new TreeBuilder(rootNode);
        tree = builder.getRootNode();
        entry = new CacheEntry(this.negatedTerms, this.andTerms, this.orTerms, this.literals, this.terms, rootNode, tree);
        synchronized (cache) {
            cache.put(hashVal, entry);
        }
    }

}
 
Example #19
Source File: BloomMapFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private synchronized void initBloomFilter(Configuration conf) {
  numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
  // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
  // single key, where <code> is the number of hash functions,
  // <code>n</code> is the number of keys and <code>c</code> is the desired
  // max. error rate.
  // Our desired error rate is by default 0.005, i.e. 0.5%
  float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
  vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
      Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
  bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
      Hash.getHashType(conf), numKeys);
}
 
Example #20
Source File: BloomMapFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private synchronized void initBloomFilter(Configuration conf) {
  numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024);
  // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for
  // single key, where <code> is the number of hash functions,
  // <code>n</code> is the number of keys and <code>c</code> is the desired
  // max. error rate.
  // Our desired error rate is by default 0.005, i.e. 0.5%
  float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f);
  vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) /
      Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT)));
  bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT,
      Hash.getHashType(conf), numKeys);
}
 
Example #21
Source File: Filter.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void readFields(DataInput in) throws IOException {
  int ver = in.readInt();
  if (ver > 0) { // old unversioned format
    this.nbHash = ver;
    this.hashType = Hash.JENKINS_HASH;
  } else if (ver == VERSION) {
    this.nbHash = in.readInt();
    this.hashType = in.readByte();
  } else {
    throw new IOException("Unsupported version: " + ver);
  }
  this.vectorSize = in.readInt();
  this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
 
Example #22
Source File: BuildBloomBase.java    From spork with Apache License 2.0 5 votes vote down vote up
private int convertHashType(String hashType) {
    if (hashType.toLowerCase().contains("jenkins")) {
        return Hash.JENKINS_HASH;
    } else if (hashType.toLowerCase().contains("murmur")) {
        return Hash.MURMUR_HASH;
    } else {
        throw new RuntimeException("Unknown hash type " + hashType +
            ".  Valid values are jenkins and murmur.");
    }
}
 
Example #23
Source File: DistinctAggregator.java    From compiler with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override
public void start(final EmitKey key) {
	super.start(key);

	this.filter = new DynamicBloomFilter(this.vectorSize, HASH_COUNT, Hash.MURMUR_HASH, (int) this.getArg());
}
 
Example #24
Source File: TestInternalDynamicBloomFilter.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Test
public void testBoundedSize() {

  int[] batchSizes = {1000, 10000, 10000, 100000, 100000, 10000};
  int indexForMaxGrowth = 3;
  int maxSize = batchSizes[0] * 100;
  BloomFilter filter = new HoodieDynamicBoundedBloomFilter(batchSizes[0], 0.000001, Hash.MURMUR_HASH, maxSize);
  int index = 0;
  int lastKnownBloomSize = 0;
  while (index < batchSizes.length) {
    for (int i = 0; i < batchSizes[index]; i++) {
      String key = UUID.randomUUID().toString();
      filter.add(key);
    }

    String serString = filter.serializeToString();
    if (index != 0) {
      int curLength = serString.length();
      if (index > indexForMaxGrowth) {
        assertEquals(curLength, lastKnownBloomSize, "Length should not increase after hitting max entries");
      } else {
        assertTrue(curLength > lastKnownBloomSize, "Length should increase until max entries are reached");
      }
    }
    lastKnownBloomSize = serString.length();
    index++;
  }
}
 
Example #25
Source File: BloomFilterFactory.java    From hudi with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new {@link BloomFilter} with the given args.
 *
 * @param numEntries          total number of entries
 * @param errorRate           max allowed error rate
 * @param bloomFilterTypeCode bloom filter type code
 * @return the {@link BloomFilter} thus created
 */
public static BloomFilter createBloomFilter(int numEntries, double errorRate, int maxNumberOfEntries,
                                            String bloomFilterTypeCode) {
  if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.SIMPLE.name())) {
    return new SimpleBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH);
  } else if (bloomFilterTypeCode.equalsIgnoreCase(BloomFilterTypeCode.DYNAMIC_V0.name())) {
    return new HoodieDynamicBoundedBloomFilter(numEntries, errorRate, Hash.MURMUR_HASH, maxNumberOfEntries);
  } else {
    throw new IllegalArgumentException("Bloom Filter type code not recognizable " + bloomFilterTypeCode);
  }
}
 
Example #26
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static DynamicBloomFilter newDynamicBloomFilter(
        @Nonnegative final int expectedNumberOfElements, @Nonnegative final float errorRate,
        @Nonnegative final int nbHash) {
    int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements)
            / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash)));
    return new DynamicBloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH,
        expectedNumberOfElements);
}
 
Example #27
Source File: BloomFilterUtils.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Nonnull
public static BloomFilter newBloomFilter(@Nonnegative final int expectedNumberOfElements,
        @Nonnegative final float errorRate, @Nonnegative final int nbHash) {
    // vector size should be `-kn / (ln(1 - c^(1/k)))` bits for
    // single key, where `k` is the number of hash functions,
    // `n` is the number of keys and `c` is the desired max error rate.
    int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements)
            / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash)));
    return new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH);
}
 
Example #28
Source File: TestBloomFilters.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testCountingBloomFilter() {
  int hashId = Hash.JENKINS_HASH;

  CountingBloomFilter filter = new CountingBloomFilter(bitSize,
      hashFunctionNumber, hashId);

  Key key = new Key(new byte[] { 48, 48 });

  filter.add(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 1);

  filter.add(key);
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 2);

  filter.delete(key);
  assertTrue("CountingBloomFilter.membership error ",
      filter.membershipTest(key));

  filter.delete(key);
  assertFalse("CountingBloomFilter.membership error ",
      filter.membershipTest(key));
  assertTrue("CountingBloomFilter.approximateCount error",
      filter.approximateCount(key) == 0);

  BloomFilterCommonTester.of(hashId, numInsertions)
      .withFilterInstance(filter)
      .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY,
              BloomFilterTestStrategy.ADD_KEYS_STRATEGY,
              BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY,
              BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY,
              BloomFilterTestStrategy.WRITE_READ_STRATEGY,
              BloomFilterTestStrategy.FILTER_OR_STRATEGY,
              BloomFilterTestStrategy.FILTER_XOR_STRATEGY)).test();
}
 
Example #29
Source File: TestBloomFilters.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testNot() {
  BloomFilter bf = new BloomFilter(8, 1, Hash.JENKINS_HASH);
  bf.bits = BitSet.valueOf(new byte[] { (byte) 0x95 });
  BitSet origBitSet = (BitSet) bf.bits.clone();
  bf.not();
  assertFalse("BloomFilter#not should have inverted all bits",
              bf.bits.intersects(origBitSet));
}
 
Example #30
Source File: TestBloomFilters.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testRetouchedBloomFilterSpecific() {
  int numInsertions = 1000;
  int hashFunctionNumber = 5;

  ImmutableSet<Integer> hashes = ImmutableSet.of(Hash.MURMUR_HASH,
      Hash.JENKINS_HASH);

  for (Integer hashId : hashes) {      
    RetouchedBloomFilter filter = new RetouchedBloomFilter(bitSize,
        hashFunctionNumber, hashId);

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MAXIMUM_FP);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.MINIMUM_FN);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.ODD,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));

    checkOnAbsentFalsePositive(hashId, numInsertions, filter, Digits.EVEN,
        RemoveScheme.RATIO);
    filter.and(new RetouchedBloomFilter(bitSize, hashFunctionNumber, hashId));
  }
}